On Fri, May 03, 2013 at 07:34:15AM -0400, Neil Horman wrote:
> This warning was reported recently:
> 
> WARNING: at drivers/scsi/libfc/fc_exch.c:478 fc_seq_send+0x14f/0x160 [libfc]()
> (Not tainted)
> Hardware name: ProLiant DL120 G7
> Modules linked in: tcm_fc target_core_iblock target_core_file 
> target_core_pscsi
> target_core_mod configfs dm_round_robin dm_multipath 8021q garp stp llc bnx2fc
> cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt autofs4 sunrpc
> pcc_cpufreq ipv6 hpilo hpwdt e1000e microcode iTCO_wdt iTCO_vendor_support
> serio_raw shpchp ixgbe dca mdio sg ext4 mbcache jbd2 sd_mod crc_t10dif 
> pata_acpi
> ata_generic ata_piix hpsa dm_mirror dm_region_hash dm_log dm_mod [last 
> unloaded:
> scsi_wait_scan]
> Pid: 5464, comm: target_completi Not tainted 2.6.32-272.el6.x86_64 #1
> Call Trace:
>  [<ffffffff8106b747>] ? warn_slowpath_common+0x87/0xc0
>  [<ffffffff8106b79a>] ? warn_slowpath_null+0x1a/0x20
>  [<ffffffffa025f7df>] ? fc_seq_send+0x14f/0x160 [libfc]
>  [<ffffffffa035cbce>] ? ft_queue_status+0x16e/0x210 [tcm_fc]
>  [<ffffffffa030a660>] ? target_complete_ok_work+0x0/0x4b0 [target_core_mod]
>  [<ffffffffa030a766>] ? target_complete_ok_work+0x106/0x4b0 [target_core_mod]
>  [<ffffffffa030a660>] ? target_complete_ok_work+0x0/0x4b0 [target_core_mod]
>  [<ffffffff8108c760>] ? worker_thread+0x170/0x2a0
>  [<ffffffff810920d0>] ? autoremove_wake_function+0x0/0x40
>  [<ffffffff8108c5f0>] ? worker_thread+0x0/0x2a0
>  [<ffffffff81091d66>] ? kthread+0x96/0xa0
>  [<ffffffff8100c14a>] ? child_rip+0xa/0x20
>  [<ffffffff81091cd0>] ? kthread+0x0/0xa0
>  [<ffffffff8100c140>] ? child_rip+0x0/0x20
> 
> It occurs because fc_seq_send can have multiple contexts executing within it 
> at
> the same time, and fc_seq_send doesn't consistently use the ep->ex_lock that
> protects this structure.  Because of that, its possible for one context to 
> clear
> the INIT bit in the ep->esb_state field while another checks it, leading to 
> the
> above stack trace generated by the WARN_ON in the function.
> 
> We should probably undertake the effort to convert access to the fc_exch
> structures to use rcu, but that a larger work item.  To just fix this specific
> issue, we can just extend the ex_lock protection through the entire 
> fc_seq_send
> path
> 
> Signed-off-by: Neil Horman <[email protected]>
> Reported-by: Gris Ge <[email protected]>
> CC: Robert Love <[email protected]>
> ---
>  drivers/scsi/libfc/fc_exch.c | 37 ++++++++++++++++++++++++-------------
>  1 file changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
> index c772d8d..8b928c6 100644
> --- a/drivers/scsi/libfc/fc_exch.c
> +++ b/drivers/scsi/libfc/fc_exch.c
> @@ -463,13 +463,7 @@ static void fc_exch_delete(struct fc_exch *ep)
>       fc_exch_release(ep);    /* drop hold for exch in mp */
>  }
>  
> -/**
> - * fc_seq_send() - Send a frame using existing sequence/exchange pair
> - * @lport: The local port that the exchange will be sent on
> - * @sp:         The sequence to be sent
> - * @fp:         The frame to be sent on the exchange
> - */
> -static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
> +static int fc_seq_send_locked(struct fc_lport *lport, struct fc_seq *sp,
>                      struct fc_frame *fp)
>  {
>       struct fc_exch *ep;
> @@ -479,7 +473,7 @@ static int fc_seq_send(struct fc_lport *lport, struct 
> fc_seq *sp,
>       u8 fh_type = fh->fh_type;
>  
>       ep = fc_seq_exch(sp);
> -     WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) != ESB_ST_SEQ_INIT);
> +     WARN_ON(!(ep->esb_stat & ESB_ST_SEQ_INIT));
>  
>       f_ctl = ntoh24(fh->fh_f_ctl);
>       fc_exch_setup_hdr(ep, fp, f_ctl);
> @@ -502,17 +496,34 @@ static int fc_seq_send(struct fc_lport *lport, struct 
> fc_seq *sp,
>       error = lport->tt.frame_send(lport, fp);
>  
>       if (fh_type == FC_TYPE_BLS)
> -             return error;
> +             goto out;
>  
>       /*
>        * Update the exchange and sequence flags,
>        * assuming all frames for the sequence have been sent.
>        * We can only be called to send once for each sequence.
>        */
> -     spin_lock_bh(&ep->ex_lock);
>       ep->f_ctl = f_ctl & ~FC_FC_FIRST_SEQ;   /* not first seq */
>       if (f_ctl & FC_FC_SEQ_INIT)
>               ep->esb_stat &= ~ESB_ST_SEQ_INIT;
> +out:
> +     return error;
> +}
> +
> +/**
> + * fc_seq_send() - Send a frame using existing sequence/exchange pair
> + * @lport: The local port that the exchange will be sent on
> + * @sp:         The sequence to be sent
> + * @fp:         The frame to be sent on the exchange
> + */
> +static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
> +                    struct fc_frame *fp)
> +{
> +     struct fc_exch *ep;
> +     int error;
> +     ep = fc_seq_exch(sp);
> +     spin_lock_bh(&ep->ex_lock);
> +     error = fc_seq_send_locked(lport, sp, fp);
>       spin_unlock_bh(&ep->ex_lock);
>       return error;
>  }
> @@ -629,7 +640,7 @@ static int fc_exch_abort_locked(struct fc_exch *ep,
>       if (fp) {
>               fc_fill_fc_hdr(fp, FC_RCTL_BA_ABTS, ep->did, ep->sid,
>                              FC_TYPE_BLS, FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
> -             error = fc_seq_send(ep->lp, sp, fp);
> +             error = fc_seq_send_locked(ep->lp, sp, fp);
>       } else
>               error = -ENOBUFS;
>       return error;
> @@ -1132,7 +1143,7 @@ static void fc_seq_send_last(struct fc_seq *sp, struct 
> fc_frame *fp,
>       f_ctl = FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
>       f_ctl |= ep->f_ctl;
>       fc_fill_fc_hdr(fp, rctl, ep->did, ep->sid, fh_type, f_ctl, 0);
> -     fc_seq_send(ep->lp, sp, fp);
> +     fc_seq_send_locked(ep->lp, sp, fp);
>  }
>  
>  /**
> @@ -1307,8 +1318,8 @@ static void fc_exch_recv_abts(struct fc_exch *ep, 
> struct fc_frame *rx_fp)
>               ap->ba_low_seq_cnt = htons(sp->cnt);
>       }
>       sp = fc_seq_start_next_locked(sp);
> -     spin_unlock_bh(&ep->ex_lock);
>       fc_seq_send_last(sp, fp, FC_RCTL_BA_ACC, FC_TYPE_BLS);
> +     spin_unlock_bh(&ep->ex_lock);
>       fc_frame_free(rx_fp);
>       return;
>  
> -- 
> 1.8.1.4
> 
> 

Ping, any movement on this?  This has been an observed bug several times now.

Thanks!
Neil

_______________________________________________
fcoe-devel mailing list
[email protected]
http://lists.open-fcoe.org/mailman/listinfo/fcoe-devel

Reply via email to