> -----Original Message-----
> From: Neil Horman [mailto:[email protected]]
> Sent: Friday, May 10, 2013 6:09 AM
> To: [email protected]
> Cc: Love, Robert W
> Subject: Re: [PATCH] libfc: extend ex_lock to protect all of fc_seq_send
> 
> On Fri, May 03, 2013 at 07:34:15AM -0400, Neil Horman wrote:
> > This warning was reported recently:
> >
> > WARNING: at drivers/scsi/libfc/fc_exch.c:478 fc_seq_send+0x14f/0x160
> > [libfc]() (Not tainted) Hardware name: ProLiant DL120 G7 Modules
> > linked in: tcm_fc target_core_iblock target_core_file
> > target_core_pscsi target_core_mod configfs dm_round_robin
> dm_multipath
> > 8021q garp stp llc bnx2fc cnic uio fcoe libfcoe libfc
> > scsi_transport_fc scsi_tgt autofs4 sunrpc pcc_cpufreq ipv6 hpilo hpwdt
> > e1000e microcode iTCO_wdt iTCO_vendor_support serio_raw shpchp
> ixgbe
> > dca mdio sg ext4 mbcache jbd2 sd_mod crc_t10dif pata_acpi ata_generic
> ata_piix hpsa dm_mirror dm_region_hash dm_log dm_mod [last unloaded:
> > scsi_wait_scan]
> > Pid: 5464, comm: target_completi Not tainted 2.6.32-272.el6.x86_64 #1
> > Call Trace:
> >  [<ffffffff8106b747>] ? warn_slowpath_common+0x87/0xc0
> > [<ffffffff8106b79a>] ? warn_slowpath_null+0x1a/0x20
> > [<ffffffffa025f7df>] ? fc_seq_send+0x14f/0x160 [libfc]
> > [<ffffffffa035cbce>] ? ft_queue_status+0x16e/0x210 [tcm_fc]
> > [<ffffffffa030a660>] ? target_complete_ok_work+0x0/0x4b0
> > [target_core_mod]  [<ffffffffa030a766>] ?
> > target_complete_ok_work+0x106/0x4b0 [target_core_mod]
> > [<ffffffffa030a660>] ? target_complete_ok_work+0x0/0x4b0
> > [target_core_mod]  [<ffffffff8108c760>] ? worker_thread+0x170/0x2a0
> > [<ffffffff810920d0>] ? autoremove_wake_function+0x0/0x40
> > [<ffffffff8108c5f0>] ? worker_thread+0x0/0x2a0  [<ffffffff81091d66>] ?
> > kthread+0x96/0xa0  [<ffffffff8100c14a>] ? child_rip+0xa/0x20
> > [<ffffffff81091cd0>] ? kthread+0x0/0xa0  [<ffffffff8100c140>] ?
> > child_rip+0x0/0x20
> >
> > It occurs because fc_seq_send can have multiple contexts executing
> > within it at the same time, and fc_seq_send doesn't consistently use
> > the ep->ex_lock that protects this structure.  Because of that, its
> > possible for one context to clear the INIT bit in the ep->esb_state
> > field while another checks it, leading to the above stack trace generated by
> the WARN_ON in the function.
> >
> > We should probably undertake the effort to convert access to the
> > fc_exch structures to use rcu, but that a larger work item.  To just
> > fix this specific issue, we can just extend the ex_lock protection
> > through the entire fc_seq_send path
> >
> > Signed-off-by: Neil Horman <[email protected]>
> > Reported-by: Gris Ge <[email protected]>
> > CC: Robert Love <[email protected]>
> > ---
> >  drivers/scsi/libfc/fc_exch.c | 37
> > ++++++++++++++++++++++++-------------
> >  1 file changed, 24 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/scsi/libfc/fc_exch.c
> > b/drivers/scsi/libfc/fc_exch.c index c772d8d..8b928c6 100644
> > --- a/drivers/scsi/libfc/fc_exch.c
> > +++ b/drivers/scsi/libfc/fc_exch.c
> > @@ -463,13 +463,7 @@ static void fc_exch_delete(struct fc_exch *ep)
> >     fc_exch_release(ep);    /* drop hold for exch in mp */
> >  }
> >
> > -/**
> > - * fc_seq_send() - Send a frame using existing sequence/exchange pair
> > - * @lport: The local port that the exchange will be sent on
> > - * @sp:       The sequence to be sent
> > - * @fp:       The frame to be sent on the exchange
> > - */
> > -static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
> > +static int fc_seq_send_locked(struct fc_lport *lport, struct fc_seq
> > +*sp,
> >                    struct fc_frame *fp)
> >  {
> >     struct fc_exch *ep;
> > @@ -479,7 +473,7 @@ static int fc_seq_send(struct fc_lport *lport, struct
> fc_seq *sp,
> >     u8 fh_type = fh->fh_type;
> >
> >     ep = fc_seq_exch(sp);
> > -   WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) !=
> ESB_ST_SEQ_INIT);
> > +   WARN_ON(!(ep->esb_stat & ESB_ST_SEQ_INIT));
> >
> >     f_ctl = ntoh24(fh->fh_f_ctl);
> >     fc_exch_setup_hdr(ep, fp, f_ctl);
> > @@ -502,17 +496,34 @@ static int fc_seq_send(struct fc_lport *lport,
> struct fc_seq *sp,
> >     error = lport->tt.frame_send(lport, fp);
> >
> >     if (fh_type == FC_TYPE_BLS)
> > -           return error;
> > +           goto out;
> >
> >     /*
> >      * Update the exchange and sequence flags,
> >      * assuming all frames for the sequence have been sent.
> >      * We can only be called to send once for each sequence.
> >      */
> > -   spin_lock_bh(&ep->ex_lock);
> >     ep->f_ctl = f_ctl & ~FC_FC_FIRST_SEQ;   /* not first seq */
> >     if (f_ctl & FC_FC_SEQ_INIT)
> >             ep->esb_stat &= ~ESB_ST_SEQ_INIT;
> > +out:
> > +   return error;
> > +}
> > +
> > +/**
> > + * fc_seq_send() - Send a frame using existing sequence/exchange pair
> > + * @lport: The local port that the exchange will be sent on
> > + * @sp:       The sequence to be sent
> > + * @fp:       The frame to be sent on the exchange
> > + */
> > +static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
> > +                  struct fc_frame *fp)
> > +{
> > +   struct fc_exch *ep;
> > +   int error;
> > +   ep = fc_seq_exch(sp);
> > +   spin_lock_bh(&ep->ex_lock);
> > +   error = fc_seq_send_locked(lport, sp, fp);
> >     spin_unlock_bh(&ep->ex_lock);
> >     return error;
> >  }
> > @@ -629,7 +640,7 @@ static int fc_exch_abort_locked(struct fc_exch *ep,
> >     if (fp) {
> >             fc_fill_fc_hdr(fp, FC_RCTL_BA_ABTS, ep->did, ep->sid,
> >                            FC_TYPE_BLS, FC_FC_END_SEQ |
> FC_FC_SEQ_INIT, 0);
> > -           error = fc_seq_send(ep->lp, sp, fp);
> > +           error = fc_seq_send_locked(ep->lp, sp, fp);
> >     } else
> >             error = -ENOBUFS;
> >     return error;
> > @@ -1132,7 +1143,7 @@ static void fc_seq_send_last(struct fc_seq *sp,
> struct fc_frame *fp,
> >     f_ctl = FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
> >     f_ctl |= ep->f_ctl;
> >     fc_fill_fc_hdr(fp, rctl, ep->did, ep->sid, fh_type, f_ctl, 0);
> > -   fc_seq_send(ep->lp, sp, fp);
> > +   fc_seq_send_locked(ep->lp, sp, fp);
> >  }
> >
> >  /**
> > @@ -1307,8 +1318,8 @@ static void fc_exch_recv_abts(struct fc_exch *ep,
> struct fc_frame *rx_fp)
> >             ap->ba_low_seq_cnt = htons(sp->cnt);
> >     }
> >     sp = fc_seq_start_next_locked(sp);
> > -   spin_unlock_bh(&ep->ex_lock);
> >     fc_seq_send_last(sp, fp, FC_RCTL_BA_ACC, FC_TYPE_BLS);
> > +   spin_unlock_bh(&ep->ex_lock);
> >     fc_frame_free(rx_fp);
> >     return;
> >
> > --
> > 1.8.1.4
> >
> >
> 
> Ping, any movement on this?  This has been an observed bug several times
> now.
> 

Applied. I had some test environment problems to work though. Thanks, //Rob
_______________________________________________
fcoe-devel mailing list
[email protected]
http://lists.open-fcoe.org/mailman/listinfo/fcoe-devel

Reply via email to