On Tue, 26 Feb 2013 14:48:26 -0500 Hal Rosenstock <[email protected]> wrote:
> On 2/26/2013 1:58 PM, Ira Weiny wrote: > > On Tue, 26 Feb 2013 10:03:35 -0500 > > Hal Rosenstock <[email protected]> wrote: > > > >> On 2/21/2013 4:33 PM, Ira Weiny wrote: > >>> [snip] > >>> +/********************************************************************** > >>> * query the Port Counters of all the nodes in the subnet. > >>> **********************************************************************/ > >>> static void perfmgr_query_counters(cl_map_item_t * p_map_item, void > >>> *context) > >>> @@ -557,22 +632,42 @@ static void perfmgr_query_counters(cl_map_item_t * > >>> p_map_item, void *context) > >>> mad_context.perfmgr_context.node_guid = node_guid; > >>> mad_context.perfmgr_context.port = port; > >>> mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; > >>> + > >>> + if (!mon_node->port[port].cpi_valid) { > >>> + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, > >>> + mon_node->port[port].pkey_ix, > >>> + port, &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> + if (status != IB_SUCCESS) > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " > >>> + "Failed to issue ClassPortInfo query " > >>> + "for node 0x%" PRIx64 > >>> + " port %d (%s)\n", > >>> + node->node_info.node_guid, port, > >>> + node->print_desc); > >>> + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) > >>> + goto Exit; /* only need to issue 1 CPI query > >>> + for switches */ > >> > >> Have you tried switches with base SP0 ? > > > > Yes. Why? I admit I may be confused about when SP0 is different from > > physical ports. > > The reason I asked here is that the ClassPortInfo when BSP0 is being > requested on port 1 not 0. > > I'm not sure what you're referring to in terms of SP0 being different > from physical ports so I'll elaborate on what I think you may be asking: > > In general, BSP0 is different than ESP0 in terms of PMA attributes in > that it is excluded in PortSelect. PortSelect says "However, 0 is > only valid for the enhanced switch management port; it is > ignored for the base switch management port." Right, BSP0 doesn't have PM counters. So why did you ask if I had tried this with base SP0? To be clear I have tried this with both Enhanced SP0 and Base SP0 and it seems to work correctly. Ira > > SP0 is different from physical ports in that it's a virtual IB port > (usually across CPU link or the like) and doesn't have a remote peer. > > >> > >>> + } else { > >>> + > >>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE > >>> - gettimeofday(&mad_context.perfmgr_context.query_start, NULL); > >>> + gettimeofday(&mad_context.perfmgr_context.query_start, > >>> NULL); > >>> #endif > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" > >>> - PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, > >>> - cl_ntoh16(lid), node->print_desc); > >>> - status = perfmgr_send_pc_mad(pm, lid, remote_qp, > >>> - mon_node->port[port].pkey_ix, > >>> - port, IB_MAD_METHOD_GET, > >>> - &mad_context); > >>> - if (status != IB_SUCCESS) > >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > >>> - "Failed to issue port counter query for node > >>> 0x%" > >>> - PRIx64 " port %d (%s)\n", > >>> - node->node_info.node_guid, port, > >>> - node->print_desc); > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for > >>> node 0x%" > >>> + PRIx64 " port %d (lid %u) (%s)\n", node_guid, > >>> port, > >>> + cl_ntoh16(lid), node->print_desc); > >>> + status = perfmgr_send_pc_mad(pm, lid, remote_qp, > >>> + > >>> mon_node->port[port].pkey_ix, > >>> + port, IB_MAD_METHOD_GET, > >>> + &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> + if (status != IB_SUCCESS) > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " > >>> + "Failed to issue port counter query for > >>> node 0x%" > >>> + PRIx64 " port %d (%s)\n", > >>> + node->node_info.node_guid, port, > >>> + node->print_desc); > >>> + } > >>> } > >>> Exit: > >>> cl_plock_release(&pm->osm->lock); > >>> @@ -1053,7 +1148,8 @@ static void perfmgr_check_overflow(osm_perfmgr_t * > >>> pm, > >>> /* clear port counters */ > >>> status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, > >>> port, IB_MAD_METHOD_SET, > >>> - &mad_context); > >>> + &mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> if (status != IB_SUCCESS) > >>> OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " > >>> "Failed to send clear counters MAD for %s (0x%" > >>> @@ -1187,6 +1283,7 @@ static void pc_recv_process(void *context, void > >>> *data) > >>> monitored_node_t *p_mon_node; > >>> int16_t pkey_ix = 0; > >>> boolean_t valid = TRUE; > >>> + ib_class_port_info_t *cpi = NULL; > >>> > >>> OSM_LOG_ENTER(pm->log); > >>> > >>> @@ -1209,15 +1306,44 @@ static void pc_recv_process(void *context, void > >>> *data) > >>> CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || > >>> p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); > >>> > >>> + /* capture CLASS_PORT_INFO data */ > >>> + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > >>> + cpi = (ib_class_port_info_t *) & > >>> + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); > >>> + > >>> + cl_plock_acquire(&pm->osm->lock); > >>> + /* validate port number */ > >>> + if (port >= p_mon_node->num_ports) { > >>> + cl_plock_release(&pm->osm->lock); > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " > >>> + "Invalid port num %d for GUID 0x%016" > >>> + PRIx64 " num ports %d\n", port, node_guid, > >>> + p_mon_node->num_ports); > >>> + goto Exit; > >>> + } > >>> + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { > >>> + int i = 0; > >>> + for (i = p_mon_node->esp0 ? 0 : 1; > >>> + i < p_mon_node->num_ports; > >>> + i++) { > >>> + p_mon_node->port[i].cap_mask = cpi->cap_mask; > >>> + p_mon_node->port[i].cpi_valid = TRUE; > >>> + } > >>> + } else { > >>> + p_mon_node->port[port].cap_mask = cpi->cap_mask; > >>> + p_mon_node->port[port].cpi_valid = TRUE; > >>> + } > >>> + cl_plock_release(&pm->osm->lock); > >>> + } > >>> + > >>> /* Response could also be redirection (IBM eHCA PMA does this) */ > >>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT && > >>> - p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { > >>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) { > >> > >> Shouldn't this be part of if (p_mad->attr_id == > >> IB_MAD_ATTR_CLASS_PORT_INFO) clause ? > >> > > > > Yes but I took care of that in the next patch where I cleaned up the code > > and made that entire block a function "handle_redirect". > > I see that now. The semantics here shouldn't have been changed but it's > probably OK that the next patch fixes that again. > > -- Hal > > > Ira > > <snip...> > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to [email protected] > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Ira Weiny Member of Technical Staff Lawrence Livermore National Lab 925-423-8008 [email protected] -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
