On Sun, 24 Feb 2013 10:21:40 +0200
Dan Ben Yosef <[email protected]> wrote:

> 1) if use -G or -D option : we obtain sl before doing perf query.
> 2) if no destination is given : we obtain sl for every pair 
> source-destination.
> 3) if no destination is given and use --skip-sl option : we don't obtain
> sl to all nodes in the fabric,in this case sl=0 for all node pairs.

--skip-sl should apply to all modes.  Sorry if I was not clear about that.

More comments below.

> 
> Signed-off-by: Dan Ben Yosef <[email protected]>
> ---
> Changes since v1:
>         Change 2 and 3: by default we obtain sl. When using skip-sl option
>         we don't obtain sl.
>       Add option to documentation.
> 
>  doc/man/ibqueryerrors.8.in |    3 ++
>  src/ibqueryerrors.c        |   83 ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 84 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/man/ibqueryerrors.8.in b/doc/man/ibqueryerrors.8.in
> index 00b0f61..65e5206 100644
> --- a/doc/man/ibqueryerrors.8.in
> +++ b/doc/man/ibqueryerrors.8.in
> @@ -56,6 +56,9 @@ Report the port information.  This includes LID, port, 
> external port (if
>  applicable), link speed setting, remote GUID, remote port, remote external 
> port
>  (if applicable), and remote node description information.
>  .sp
> +\fB\-\-skip-sl\fP
> +Use the default sl for queries.
> +.sp
>  \fB\-\-data\fP
>  Include the optional transmit and receive data counters.
>  .sp

This should be added to doc/rst/ibqueryerrors.8.in.rst and then the output from 
that added to the git tree.

> diff --git a/src/ibqueryerrors.c b/src/ibqueryerrors.c
> index 6320972..c35cf6e 100644
> --- a/src/ibqueryerrors.c
> +++ b/src/ibqueryerrors.c
> @@ -55,11 +55,14 @@
>  #include <infiniband/mad.h>
>  
>  #include "ibdiag_common.h"
> +#include "ibdiag_sa.h"
>  
>  struct ibmad_port *ibmad_port;
>  static char *node_name_map_file = NULL;
>  static nn_map_t *node_name_map = NULL;
>  static char *load_cache_file = NULL;
> +static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
> +static int half_world_query = 1;
>  
>  int data_counters = 0;
>  int data_counters_only = 0;
> @@ -78,6 +81,8 @@ unsigned clear_errors = 0, clear_counts = 0, details = 0;
>  #define PRINT_ROUTER 0x4
>  #define PRINT_ALL 0xFF               /* all nodes default flag */
>  
> +#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)

This is fine for now but it might be nice to have an option for this as well.

> +
>  struct {
>       int nodes_checked;
>       int bad_nodes;
> @@ -298,6 +303,51 @@ static int print_summary(void)
>       return (summary.bad_ports);
>  }
>  
> +static void insert_lid2sl_table(struct sa_query_result *r)
> +{
> +     unsigned int i;
> +     for (i = 0; i < r->result_cnt; i++) {
> +             ib_path_rec_t *p_pr = (ib_path_rec_t 
> *)sa_get_query_rec(r->p_result_madw, i);
> +             lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
> +     }
> +}
> +
> +static int path_record_query(int src_lid,int dest_lid)
> +{
> +     ib_path_rec_t pr;
> +     ib_net64_t comp_mask = 0;
> +     uint8_t reversible = 0;
> +     struct sa_handle * h;
> +
> +     h = sa_get_handle();
> +     ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
> +     memset(&pr, 0, sizeof(pr));
> +
> +     CHECK_AND_SET_VAL(src_lid, 16, 0, pr.slid, PR, SLID);
> +     CHECK_AND_SET_VAL(dest_lid, 16, 0, pr.dlid, PR, DLID);/*if dlid is 0 
> then we do half world query*/
> +     CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only 
> one PathRecord for each source and destination pair*/
> +     CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a 
> reversible path*/
> +     pr.num_path |= reversible << 7;
> +     struct sa_query_result result;
> +     int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
> +                        
> (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
> +                        &pr, sizeof(pr), &result);
> +     if (ret) {
> +             fprintf(stderr, "Query SA failed: %s; sa call path_query 
> failed\n", strerror(ret));
> +             return ret;
> +     }
> +     if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
> +             sa_report_err(result.status);
> +             ret = EIO;
> +             goto Exit;
> +     }
> +
> +     insert_lid2sl_table(&result);
> +Exit:
> +     sa_free_result_mad(&result);
> +     return ret;
> +}
> +
>  static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
>                         ibnd_node_t * node, char *node_name, int portnum,
>                         const char *attr_name, uint16_t attr_id,
> @@ -447,6 +497,8 @@ static int query_cap_mask(ib_portid_t * portid, char 
> *node_name, int portnum,
>       uint8_t pc[1024] = { 0 };
>       uint16_t rc_cap_mask;
>  
> +     portid->sl = lid2sl_table[portid->lid];
> +
>       /* PerfMgt ClassPortInfo is a required attribute */
>       if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
>                          ibmad_port)) {
> @@ -474,6 +526,8 @@ static int print_data_cnts(ib_portid_t * portid, uint16_t 
> cap_mask,
>  
>       memset(pc, 0, 1024);
>  
> +     portid->sl = lid2sl_table[portid->lid];
> +
>       if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | 
> IB_PM_EXT_WIDTH_NOIETF_SUP)) {
>               if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>                                  IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
> @@ -543,6 +597,8 @@ static int print_errors(ib_portid_t * portid, uint16_t 
> cap_mask,
>       memset(pc, 0, 1024);
>       memset(pce, 0, 1024);
>  
> +     portid->sl = lid2sl_table[portid->lid];
> +
>       if (!pma_query_via(pc, portid, portnum, ibd_timeout,
>                          IB_GSI_PORT_COUNTERS, ibmad_port)) {
>               IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
> @@ -822,6 +878,9 @@ static int process_opt(void *context, int ch, char 
> *optarg)
>       case 9:
>               data_counters_only = 1;
>               break;
> +     case 10:
> +             half_world_query = 0;
> +             break;
>       case 'G':
>       case 'S':
>               port_guid_str = optarg;
> @@ -858,6 +917,8 @@ int main(int argc, char **argv)
>       ib_portid_t portid = { 0 };
>       int rc = 0;
>       ibnd_fabric_t *fabric = NULL;
> +     int self_lid = 0;
> +     int port = 0;
>  
>       int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
>               IB_PERFORMANCE_CLASS
> @@ -875,6 +936,7 @@ int main(int argc, char **argv)
>                "Same as \"-G\" for backward compatibility"},
>               {"Direct", 'D', 1, "<dr_path>",
>                "report the node containing the port specified by <dr_path>"},
> +             {"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
>               {"report-port", 'r', 0, NULL,
>                "report port link information"},
>               {"threshold-file", 8, 1, NULL,
> @@ -933,6 +995,11 @@ int main(int argc, char **argv)
>  
>       /* limit the scan the fabric around the target */
>       if (dr_path) {
> +             if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +                     IBERROR("can't resolve self port %s", argv[0]);
> +                     goto close_port;
> +             }
> +             self_lid = portid.lid;

I think you should call this before the check for dr_path and not below.  It is 
not too expensive to just resolve our local lid with resolve_self.

>               if ((resolved =
>                    resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
>                                       IB_DEST_DRPATH, NULL, ibmad_port)) < 0) 
> {
> @@ -947,6 +1014,13 @@ int main(int argc, char **argv)
>                       IBWARN("Failed to resolve %s;",port_guid_str);
>                       goto close_port;
>               }
> +             lid2sl_table[portid.lid] = portid.sl;
> +     } else {

This else here is a compile error:

src/ibqueryerrors.c: In function ‘main’:
src/ibqueryerrors.c:1041: error: ‘else’ without a previous ‘if’


> +             if (resolve_self(ibd_ca, ibd_ca_port, &portid, &port, 0) < 0) {
> +                     IBERROR("can't resolve self port %s", argv[0]);
> +                     goto close_port;
> +             }
> +             self_lid = portid.lid;

I believe calling this here will break the DR mode since the NodeInfo query 
used in DR mode uses portid.

>       }
>  
>       if (load_cache_file) {
> @@ -996,12 +1070,17 @@ int main(int argc, char **argv)
>  
>               port = ibnd_find_port_guid(fabric, port_guid);
>               if (port) {
> +                     if(path_record_query(self_lid,port->base_lid))
> +                             goto close_port;

goto destroy_fabric since the fabric object is created at this point.

>                       print_node(port->node, NULL);
>               } else
>                       fprintf(stderr, "Failed to find node: %s\n", dr_path);
> -     } else
> +     } else {
> +             if(half_world_query)
> +                     if(path_record_query(self_lid,0))
> +                             goto close_port;

goto destroy_fabric since the fabric object is created at this point.

>               ibnd_iter_nodes(fabric, print_node, NULL);
> -
> +     }
>       rc = print_summary();
>       if (rc)
>               rc = 1;
> -- 
> 1.7.1
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
[email protected]
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to