Hi Sasha,

On Thu, Oct 29, 2009 at 9:57 PM, Sasha Khapyorsky <[email protected]> wrote:
> Hi Hal,
>
> On 18:27 Wed 05 Aug     , Hal Rosenstock wrote:
>>
>> NodeDescription changed trap only needs to query the new NodeDescription
>> and not cause sweep
>>
>> Similarly for SystemImageGUID changed (trap 145)
>>
>> LinkWidth/SpeedEnabled changed traps (at least right now) and
>> SM priority changed traps do need to sweep.
>>
>> In the future, LinkWidth/SpeedEnabled changed trap handling
>> can query PortInfo (may also need to bounce port too).
>>
>> Also, as noted in related email thread, it's unclear what
>> sweeping on non generic traps accomplishes but this behavior
>> is preserved.
>>
>> Signed-off-by: Hal Rosenstock <[email protected]>
>> ---
>> diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c
>> index d2e4202..e5bd529 100644
>> --- a/opensm/opensm/osm_trap_rcv.c
>> +++ b/opensm/opensm/osm_trap_rcv.c
>> @@ -291,8 +291,9 @@ trap_rcv_process_request(IN osm_sm_t * sm,
>>       osm_physp_t *p_physp;
>>       cl_ptr_vector_t *p_tbl;
>>       osm_port_t *p_port;
>> +     osm_node_t *p_node;
>>       ib_net16_t source_lid = 0;
>> -     boolean_t is_gsi = TRUE;
>> +     boolean_t is_gsi = TRUE, is_trap144_sweep = FALSE;
>
> What about reusing already existing 'run_heavy_sweep' variable and not
> introduce new one?

Sure.

>>       uint8_t port_num = 0;
>>       boolean_t physp_change_trap = FALSE;
>>       uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT;
>> @@ -547,44 +548,59 @@ trap_rcv_process_request(IN osm_sm_t * sm,
>>               }
>>       }
>
> In general: function trap_rcv_process_request() already has 400 lines of
> code and adding there more lines without simplifying flows makes me
> nervous.

Are you going to block this change until such restructure ?

>> -     /* Check for node description update. IB Spec v1.2.1 pg 823 */
>> -     if (ib_notice_is_generic(p_ntci) &&
>> -         cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144 &&
>> -         p_ntci->data_details.ntc_144.local_changes & 
>> TRAP_144_MASK_OTHER_LOCAL_CHANGES &&
>> -         p_ntci->data_details.ntc_144.change_flgs & 
>> TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) {
>> -             OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description 
>> update\n");
>> -
>> -             if (p_physp) {
>> -                     CL_PLOCK_ACQUIRE(sm->p_lock);
>> -                     osm_req_get_node_desc(sm, p_physp);
>> -                     CL_PLOCK_RELEASE(sm->p_lock);
>> -             } else
>> -                     OSM_LOG(sm->p_log, OSM_LOG_ERROR,
>> -                             "ERR 3812: No physical port found for "
>> -                             "trap 144: \"node description update\"\n");
>> -     }
>> +     if (ib_notice_is_generic(p_ntci)) {
>> +             /* Check for node description update. IB Spec v1.2.1 pg 823 */
>> +             if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144) {
>> +                     /* update port's capability mask (in PortInfo) */
>> +                     p_physp->port_info.capability_mask = 
>> p_ntci->data_details.ntc_144.new_cap_mask;
>
> Hmm, could this introduce some issues?

Like what ?

>> +                     if (p_ntci->data_details.ntc_144.local_changes & 
>> TRAP_144_MASK_OTHER_LOCAL_CHANGES) {
>> +                             if (p_ntci->data_details.ntc_144.change_flgs & 
>> TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) {
>
>        if ((p_ntci->data_details.ntc_144.local_changes &
>             TRAP_144_MASK_OTHER_LOCAL_CHANGES) &&
>            (p_ntci->data_details.ntc_144.change_flgs &
>             TRAP_144_MASK_NODE_DESCRIPTION_CHANGE)) {
>
>> +                                     OSM_LOG(sm->p_log, OSM_LOG_INFO,
>> +                                             "Trap 144 Node description 
>> update\n");
>> +
>> +                                     if (p_physp) {
>> +                                             CL_PLOCK_ACQUIRE(sm->p_lock);
>> +                                             osm_req_get_node_desc(sm, 
>> p_physp);
>> +                                             CL_PLOCK_RELEASE(sm->p_lock);
>> +                                     } else
>> +                                             OSM_LOG(sm->p_log, 
>> OSM_LOG_ERROR,
>> +                                                     "ERR 3812: No physical 
>> port found for "
>> +                                                     "trap 144: \"node 
>> description update\"\n");
>> +                             }
>> +                     }
>> +                     if (p_ntci->data_details.ntc_144.change_flgs & 
>> TRAP_144_MASK_LINK_WIDTH_ENABLE_CHANGE ||
>> +                         p_ntci->data_details.ntc_144.change_flgs & 
>> TRAP_144_MASK_LINK_SPEED_ENABLE_CHANGE ||
>> +                         p_ntci->data_details.ntc_144.change_flgs & 
>> TRAP_144_MASK_SM_PRIORITY_CHANGE)
>
>        if ((p_ntci->data_details.ntc_144.change_flgs &
>             (TRAP_144_MASK_LINK_WIDTH_ENABLE_CHANGE |
>              TRAP_144_MASK_LINK_SPEED_ENABLE_CHANGE |
>              TRAP_144_MASK_SM_PRIORITY_CHANGE)))
>
>
>> +                             is_trap144_sweep = TRUE;
>> +             }
>>
>> -     /* do a sweep if we received a trap */
>> -     if (sm->p_subn->opt.sweep_on_trap) {
>> -             /* if this is trap number 128 or run_heavy_sweep is TRUE -
>> -                update the force_heavy_sweep flag of the subnet.
>> -                Sweep also on traps 144/145 - these traps signal a change of
>> -                certain port capabilities/system image guid.
>> -                TODO: In the future this can be changed to just getting
>> -                PortInfo on this port instead of sweeping the entire 
>> subnet. */
>> -             if (ib_notice_is_generic(p_ntci) &&
>> -                 (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 128 ||
>> -                  cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144 ||
>> -                  cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 145 ||
>> -                  run_heavy_sweep)) {
>> -                     OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>> -                             "Forcing heavy sweep. Received trap:%u\n",
>> -                             cl_ntoh16(p_ntci->g_or_v.generic.trap_num));
>> +             if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 145) {
>> +                     /* update system image guid (in NodeInfo) */
>> +                     p_node = osm_physp_get_node_ptr(p_physp);
>> +                     if (p_node)
>
> When 'p_node' could be NULL?

It's just a possible return from osm_physp_get_node_ptr and wanted to
be sure this was not the case before setting node_info. Should this be
eliminated ?

>> +                             p_node->node_info.node_guid = 
>> p_ntci->data_details.ntc_145.new_sys_guid;
>
> Didn't you mean 'node_info.sys_guid' and 'node_info.node_guid'?

Just sys_guid.

>> +             }
>> +
>> +             /* do a sweep if we received a trap */
>> +             if (sm->p_subn->opt.sweep_on_trap) {
>> +                     /* if this is trap number 128 or run_heavy_sweep is
>> +                        TRUE - update the force_heavy_sweep flag of the
>> +                        subnet. Also, sweep on certain types of trap 144.
>> +                        TODO: In the future this can be changed to just
>> +                        getting PortInfo on this port instead of sweeping
>> +                        the entire subnet. */
>> +                     if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 128 
>> ||
>> +                         is_trap144_sweep || run_heavy_sweep) {
>
> It looks that prevents sweeping on PortInfo:CapMask change which
> is effectively breaks at least SM handover - new SM will be undetected.

Yes, CapMask change should be included along with the other local
changes to cause sweep.

-- Hal

>> +                             OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>> +                                     "Forcing heavy sweep. Received 
>> trap:%u\n",
>> +                                     
>> cl_ntoh16(p_ntci->g_or_v.generic.trap_num));
>>
>> -                     sm->p_subn->force_heavy_sweep = TRUE;
>> +                             sm->p_subn->force_heavy_sweep = TRUE;
>> +                     }
>> +                     osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
>>               }
>> +     } else if (sm->p_subn->opt.sweep_on_trap)
>>               osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
>> -     }
>>
>>       /* If we reached here due to trap 129/130/131 - do not need to do
>>          the notice report. Just goto exit. We know this is the case
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
N�����r��y����b�X��ǧv�^�)޺{.n�+����{��ٚ�{ay�ʇڙ�,j��f���h���z��w���
���j:+v���w�j�m��������zZ+�����ݢj"��!�i

Reply via email to