On Tue, 26 Feb 2013 14:58:01 -0500 Hal Rosenstock <[email protected]> wrote:
> On 2/26/2013 2:34 PM, Ira Weiny wrote: > > On Tue, 26 Feb 2013 10:34:56 -0500 > > Hal Rosenstock <[email protected]> wrote: > > > >> On 2/21/2013 4:33 PM, Ira Weiny wrote: > >>> > >>> > >>> Signed-off-by: Ira Weiny <[email protected]> > >>> --- > >>> opensm/osm_perfmgr.c | 200 > >>> ++++++++++++++++++++++++-------------------------- > >>> 1 files changed, 96 insertions(+), 104 deletions(-) > >>> > >>> diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c > >>> index c71111f..69b3d77 100644 > >>> --- a/opensm/osm_perfmgr.c > >>> +++ b/opensm/osm_perfmgr.c > >>> @@ -1263,6 +1263,96 @@ Exit: > >>> return pkey_ix; > >>> } > >>> > >>> +static void handle_redirect(osm_perfmgr_t *pm, > >>> + ib_class_port_info_t *cpi, > >>> + monitored_node_t *p_mon_node, > >>> + uint8_t port, > >>> + osm_madw_context_t *mad_context) > >>> +{ > >>> + char gid_str[INET6_ADDRSTRLEN]; > >>> + ib_api_status_t status; > >>> + boolean_t valid = TRUE; > >>> + int16_t pkey_ix = 0; > >>> + > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> + "Redirection to LID %u GID %s QP 0x%x received\n", > >>> + cl_ntoh16(cpi->redir_lid), > >>> + inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, > >>> + sizeof gid_str), cl_ntoh32(cpi->redir_qp)); > >>> + > >>> + if (!pm->subn->opt.perfmgr_redir) { > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> + "Redirection requested but disabled\n"); > >>> + valid = FALSE; > >>> + } > >>> + > >>> + /* valid redirection ? */ > >>> + if (cpi->redir_lid == 0) { > >>> + if (!ib_gid_is_notzero(&cpi->redir_gid)) { > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> + "Invalid redirection " > >>> + "(both redirect LID and GID are zero)\n"); > >>> + valid = FALSE; > >>> + } > >>> + } > >>> + if (cpi->redir_qp == 0) { > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n"); > >>> + valid = FALSE; > >>> + } > >>> + if (cpi->redir_pkey == 0) { > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n"); > >>> + valid = FALSE; > >>> + } > >>> + if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) { > > > > <comment on my own patch> > > This was a transposition of code that already exists and is being moved > into this routine so the comment goes back to that. > > Also, this would be separate patch if it's the right thing to do. > > > Actually I think this should only be checked if the redir_qp == 1, right? > > I'm not sure. ClassPortInfo.RedirectQ_Key on p.747 says "The Q_Key > associated with the RedirectQP. This Q_Key shall be set to the > well known Q_Key". That seems limiting but for now we should stick with the spec. So forget what I said. > > > > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n"); > >>> + valid = FALSE; > >>> + } > >>> + > >>> + pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey); > >>> + if (pkey_ix == -1) { > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> + "Index for Pkey 0x%x not found\n", > >>> + cl_ntoh16(cpi->redir_pkey)); > >>> + valid = FALSE; > >>> + } > >>> + > >>> + if (cpi->redir_lid == 0) { > >>> + /* GID redirection: get PathRecord information */ > >>> + OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> + "GID redirection not currently supported\n"); > >>> + return; > >>> + } > >>> + > >> > >> Better with: > >> if (!valid) > >> return; > >> here ? > >> > > > > The intention was to mark the port as invalid which would disable polling > > on this port. Upon review, I you are right that this is incorrect > > behaviour. > > > > However, If you continue with the poll on the original port and the > > redirect is not supported then you will end up in a loop getting CPI > > responses without any indication of error... > > That wouldn't be good. Didn't mean to break the logic. I was just trying > to simplify this and maybe went a little too far. > > > I would recommend changing the VERBOSE messages above to ERROR entries. > > This would allow the user to see that a port is returning a redirect which > > OpenSM can't handle. > > The reason it isn't ERROR level is so that the log isn't spammed with > these messages. To see the error, user could turn on VERBOSE for this > one module using PML. Ok, V2 on it's way. Ira > > -- Hal > > > > >>> + /* LID redirection support (easier than GID redirection) */ > >>> + cl_plock_acquire(&pm->osm->lock); > >> > >> Port number is already validated before handle_redirect is called, right ? > > > > Yes. > > > >> > >>> + p_mon_node->port[port].redirection = TRUE; > >>> + p_mon_node->port[port].valid = valid; > >>> + memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid, > >>> + sizeof(ib_gid_t)); > >>> + p_mon_node->port[port].lid = cpi->redir_lid; > >>> + p_mon_node->port[port].qp = cpi->redir_qp; > >>> + p_mon_node->port[port].pkey = cpi->redir_pkey; > >>> + if (pkey_ix != -1) > >>> + p_mon_node->port[port].pkey_ix = pkey_ix; > >> > >> Add: > >> p_mon_node->port[port].cpi_valid = FALSE; > >> here > >> > >>> + cl_plock_release(&pm->osm->lock); > >>> + > >>> + if (valid) { > >> > >> If check for valid above LID redirection comment, don't need if (valid) > >> clause here. > > > > Right. > > > >> > >>> + /* Finally, issue a CPI query to the redirected location */ > >>> + cl_plock_acquire(&pm->osm->lock); > >>> + p_mon_node->port[port].cpi_valid = FALSE; > >>> + cl_plock_release(&pm->osm->lock); > >> > >> Eliminate extra lock/unlock and move cpi_valid above where indicated. > > > > Agreed. > > > > Ira > > > >> > >> -- Hal > >> > >>> + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, > >>> + cpi->redir_qp, pkey_ix, > >>> + port, mad_context, > >>> + 0); /* FIXME SL != 0 */ > >>> + if (status != IB_SUCCESS) > >>> + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " > >>> + "Failed to send redirected CPI MAD " > >>> + "for node %s (0x%" PRIx64 ") port %d\n", > >>> + p_mon_node->name, p_mon_node->guid, port); > >>> + } > >>> +} > >>> + > >>> /********************************************************************** > >>> * The dispatcher uses a thread pool which will call this function when > >>> * there is a thread available to process the mad received on the wire. > >>> @@ -1281,8 +1371,6 @@ static void pc_recv_process(void *context, void > >>> *data) > >>> perfmgr_db_data_cnt_reading_t data_reading; > >>> cl_map_item_t *p_node; > >>> monitored_node_t *p_mon_node; > >>> - int16_t pkey_ix = 0; > >>> - boolean_t valid = TRUE; > >>> ib_class_port_info_t *cpi = NULL; > >>> > >>> OSM_LOG_ENTER(pm->log); > >>> @@ -1334,112 +1422,15 @@ static void pc_recv_process(void *context, void > >>> *data) > >>> p_mon_node->port[port].cpi_valid = TRUE; > >>> } > >>> cl_plock_release(&pm->osm->lock); > >>> - } > >>> - > >>> - /* Response could also be redirection (IBM eHCA PMA does this) */ > >>> - if (p_mad->status & IB_MAD_STATUS_REDIRECT) { > >>> - char gid_str[INET6_ADDRSTRLEN]; > >>> - ib_api_status_t status; > >>> > >>> - CL_ASSERT(cpi); /* Redirect should have returned CPI > >>> - (processed in previous block) */ > >>> - > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> - "Redirection to LID %u GID %s QP 0x%x received\n", > >>> - cl_ntoh16(cpi->redir_lid), > >>> - inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, > >>> - sizeof gid_str), cl_ntoh32(cpi->redir_qp)); > >>> + /* Response could also be redirection (IBM eHCA PMA does this) > >>> */ > >>> + if (p_mad->status & IB_MAD_STATUS_REDIRECT) > >>> + handle_redirect(pm, cpi, p_mon_node, port, > >>> + mad_context); > >>> > >>> - if (!pm->subn->opt.perfmgr_redir) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> - "Redirection requested but disabled\n"); > >>> - valid = FALSE; > >>> - } > >>> - > >>> - /* valid redirection ? */ > >>> - if (cpi->redir_lid == 0) { > >>> - if (!ib_gid_is_notzero(&cpi->redir_gid)) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> - "Invalid redirection " > >>> - "(both redirect LID and GID are > >>> zero)\n"); > >>> - valid = FALSE; > >>> - } > >>> - } > >>> - if (cpi->redir_qp == 0) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid > >>> RedirectQP\n"); > >>> - valid = FALSE; > >>> - } > >>> - if (cpi->redir_pkey == 0) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid > >>> RedirectP_Key\n"); > >>> - valid = FALSE; > >>> - } > >>> - if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid > >>> RedirectQ_Key\n"); > >>> - valid = FALSE; > >>> - } > >>> - > >>> - pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey); > >>> - if (pkey_ix == -1) { > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> - "Index for Pkey 0x%x not found\n", > >>> - cl_ntoh16(cpi->redir_pkey)); > >>> - valid = FALSE; > >>> - } > >>> - > >>> - if (cpi->redir_lid == 0) { > >>> - /* GID redirection: get PathRecord information */ > >>> - OSM_LOG(pm->log, OSM_LOG_VERBOSE, > >>> - "GID redirection not currently supported\n"); > >>> - goto Exit; > >>> - } > >>> - > >>> - /* LID redirection support (easier than GID redirection) */ > >>> - cl_plock_acquire(&pm->osm->lock); > >>> - /* Now, validate port number */ > >>> - if (port >= p_mon_node->num_ports) { > >>> - cl_plock_release(&pm->osm->lock); > >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " > >>> - "Invalid port num %d for GUID 0x%016" > >>> - PRIx64 " num ports %d\n", port, node_guid, > >>> - p_mon_node->num_ports); > >>> - goto Exit; > >>> - } > >>> - p_mon_node->port[port].redirection = TRUE; > >>> - p_mon_node->port[port].valid = valid; > >>> - memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid, > >>> - sizeof(ib_gid_t)); > >>> - p_mon_node->port[port].lid = cpi->redir_lid; > >>> - p_mon_node->port[port].qp = cpi->redir_qp; > >>> - p_mon_node->port[port].pkey = cpi->redir_pkey; > >>> - if (pkey_ix != -1) > >>> - p_mon_node->port[port].pkey_ix = pkey_ix; > >>> - cl_plock_release(&pm->osm->lock); > >>> - > >>> - if (!valid) > >>> - goto Exit; > >>> - > >>> - /* Finally, issue a CPI query to the redirected location */ > >>> - p_mon_node->port[port].cpi_valid = FALSE; > >>> - status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, > >>> - cpi->redir_qp, pkey_ix, > >>> - port, mad_context, > >>> - 0); /* FIXME SL != 0 */ > >>> - if (status != IB_SUCCESS) > >>> - OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " > >>> - "Failed to send redirected MAD " > >>> - "with method 0x%x for node %s " > >>> - "(NodeGuid 0x%" PRIx64 ") port %d\n", > >>> - mad_context->perfmgr_context.mad_method, > >>> - p_mon_node->name, node_guid, port); > >>> goto Exit; > >>> } > >>> > >>> - /* ClassPortInfo needed to process optional Redirection > >>> - * now exit normally > >>> - */ > >>> - if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) > >>> - goto Exit; > >>> - > >>> perfmgr_db_fill_err_read(wire_read, &err_reading); > >>> /* FIXME separate query for extended counters if they are supported > >>> * on the port. > >>> @@ -1465,7 +1456,8 @@ static void pc_recv_process(void *context, void > >>> *data) > >>> perfmgr_db_clear_prev_dc(pm->db, node_guid, port); > >>> } > >>> > >>> - perfmgr_check_overflow(pm, p_mon_node, pkey_ix, port, wire_read); > >>> + perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, > >>> + port, wire_read); > >>> > >>> #ifdef ENABLE_OSM_PERF_MGR_PROFILE > >>> do { > >> > >> -- > >> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > >> the body of a message to [email protected] > >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to [email protected] > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Ira Weiny Member of Technical Staff Lawrence Livermore National Lab 925-423-8008 [email protected] -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
