On 03/05/2018 06:02 AM, Bill Kuzeja wrote:
> Because of the shifting around of code in qla2x00_probe_one recently,
> failures during adapter initialization can lead to problems, i.e. NULL
> pointer crashes and doubly freed data structures which cause eventual
> panics.
> 
> This V2 version makes the relevant memory free routines idempotent, so
> repeat calls won't cause any harm. I also removed the problematic
> probe_init_failed exit point as it is not needed.
> 
> Fixes: d64d6c5671db ("scsi: qla2xxx: Fix NULL pointer crash due to probe 
> failure")
> Signed-off-by: Bill Kuzeja <william.kuz...@stratus.com>
> 
> ---
> 
> Some of these issues are due to:
> 
> commit d64d6c5671db scsi: qla2xxx: Fix NULL pointer crash due to probe failure
> 
> where some frees were moved around, as well as the error exit from
> a qla2x00_request_irqs failure.
> 
> This was a fix for:
> 
> commit d74595278f4a scsi: qla2xxx: Add multiple queue pair functionality.
> 
> which caused problems of its own.
> 
> To reproduce these issues, I run a test where I break the card early in
> init, (and also through kprobe fault injection). This way, I've been able
> to hit several different types of crashes, all started by failures of
> various routines called throughout the probe.
> 
> The problematic routines that fail and their exit points are:
> 
> qla2x00_alloc_queues  => probe_init_failed
> initialize_adapter    => probe_failed
> kthread_create        => probe_failed
> scsi_add_host         => probe_failed
> 
> Exit points are ordered in this way:
> 
> probe_init_failed:
>         qla2x00_free_req_que(ha, req);
>         ha->req_q_map[0] = NULL;
>         clear_bit(0, ha->req_qid_map);
>         qla2x00_free_rsp_que(ha, rsp);
>         ha->rsp_q_map[0] = NULL;
>         clear_bit(0, ha->rsp_qid_map);
>         ha->max_req_queues = ha->max_rsp_queues = 0;
> 
> probe_failed:
>         if (base_vha->timer_active)
>                 qla2x00_stop_timer(base_vha);
> ...
>         qla2x00_free_device(base_vha);
> 
>         scsi_host_put(base_vha->host);
> 
> probe_hw_failed:
>         qla2x00_mem_free(ha);
>         qla2x00_free_req_que(ha, req);
>         qla2x00_free_rsp_que(ha, rsp);
>         qla2x00_clear_drv_active(ha);
> 
> Note that qla2x00_free_device calls qla2x00_mem_free and
> qla2x00_free_rsp_que and qla2x00_free_req_que. So if we exit to
> probe_failed or probe_init_failed, we'll end up calling these
> routines multiple times.
> 
> These routines are not idempotent, I am making them so. This solves
> most of the issues.
> 
> Also probe_init_failed is not needed. In the place that it is called,
> ha->req_q_map[0] and ha->rsp_q_map[0] are not setup. And we now call
> qla2x00_free_rsp_que/qla2x00_free_req_que below in probe_hw_failed.
> I removed this exit point entirely.
> 
> Along the way I found that the return code for qla2x00_alloc_queues
> never really causes us to exit out of the probe routine.
> 
> In order to fail...
> 
>         if (!qla2x00_alloc_queues(ha, req, rsp)) {
> 
> ...we must return 0. However, internally, if this routine succeeds it
> returns 1 and if it fails it returns -ENOMEM. So I am modifying
> qla2x00_alloc_queues to fall in line with other return conventions
> where zero is a success (and obviously have changed the probe routine
> accordingly).
> 
> One more issue falls out of this case: when qla2x00_abort_all_cmds
> is invoked from qla2x00_free_device and request queues are not
> allocated (qla2x00_alloc_queues has not succeeded), we crash on a NULL
> pointer (ha->req_q_map[que]). So check for this at the start of
> qla2x00_abort_all_cmds and exit accordingly.
> 
> I've tested out these changes thoroughly failing initialization at
> various times. I've also used kprobes to inject errors to force us
> into various error paths.
> ---
>  drivers/scsi/qla2xxx/qla_os.c | 59 
> +++++++++++++++++++++++++++----------------
>  1 file changed, 37 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
> index afcb5567..3860bdfc 100644
> --- a/drivers/scsi/qla2xxx/qla_os.c
> +++ b/drivers/scsi/qla2xxx/qla_os.c
> @@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, 
> struct req_que *req,
>       ha->req_q_map[0] = req;
>       set_bit(0, ha->rsp_qid_map);
>       set_bit(0, ha->req_qid_map);
> -     return 1;
> +     return 0;
>  
>  fail_qpair_map:
>       kfree(ha->base_qpair);
> @@ -471,6 +471,9 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, 
> struct req_que *req,
>  
>  static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
>  {
> +     if (!ha->req_q_map)
> +             return;
> +
>       if (IS_QLAFX00(ha)) {
>               if (req && req->ring_fx00)
>                       dma_free_coherent(&ha->pdev->dev,
> @@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data 
> *ha, struct req_que *req)
>               (req->length + 1) * sizeof(request_t),
>               req->ring, req->dma);
>  
> -     if (req)
> +     if (req) {
>               kfree(req->outstanding_cmds);
> -
> -     kfree(req);
> +             kfree(req);
> +     }
>  }
>  
>  static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
>  {
> +     if (!ha->rsp_q_map)
> +             return;
> +
>       if (IS_QLAFX00(ha)) {
>               if (rsp && rsp->ring)
>                       dma_free_coherent(&ha->pdev->dev,
> @@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha, 
> struct rsp_que *rsp)
>               (rsp->length + 1) * sizeof(response_t),
>               rsp->ring, rsp->dma);
>       }
> -     kfree(rsp);
> +     if (rsp)
> +             kfree(rsp);
>  }
>  
>  static void qla2x00_free_queues(struct qla_hw_data *ha)
> @@ -1723,6 +1730,8 @@ uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
>       struct qla_tgt_cmd *cmd;
>       uint8_t trace = 0;
>  
> +     if (!ha->req_q_map)
> +             return;
>       spin_lock_irqsave(qp->qp_lock_ptr, flags);
>       req = qp->req;
>       for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
> @@ -3095,14 +3104,14 @@ static void qla2x00_iocb_work_fn(struct work_struct 
> *work)
>       /* Set up the irqs */
>       ret = qla2x00_request_irqs(ha, rsp);
>       if (ret)
> -             goto probe_hw_failed;
> +             goto probe_failed;
>  
>       /* Alloc arrays of request and response ring ptrs */
> -     if (!qla2x00_alloc_queues(ha, req, rsp)) {
> +     if (qla2x00_alloc_queues(ha, req, rsp)) {
>               ql_log(ql_log_fatal, base_vha, 0x003d,
>                   "Failed to allocate memory for queue pointers..."
>                   "aborting.\n");
> -             goto probe_init_failed;
> +             goto probe_failed;
>       }
>  
>       if (ha->mqenable && shost_use_blk_mq(host)) {
> @@ -3387,15 +3396,6 @@ static void qla2x00_iocb_work_fn(struct work_struct 
> *work)
>  
>       return 0;
>  
> -probe_init_failed:
> -     qla2x00_free_req_que(ha, req);
> -     ha->req_q_map[0] = NULL;
> -     clear_bit(0, ha->req_qid_map);
> -     qla2x00_free_rsp_que(ha, rsp);
> -     ha->rsp_q_map[0] = NULL;
> -     clear_bit(0, ha->rsp_qid_map);
> -     ha->max_req_queues = ha->max_rsp_queues = 0;
> -
>  probe_failed:
>       if (base_vha->timer_active)
>               qla2x00_stop_timer(base_vha);
> @@ -4508,11 +4508,17 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, 
> fc_port_t *fcport,
>       if (ha->init_cb)
>               dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
>                       ha->init_cb, ha->init_cb_dma);
> -     vfree(ha->optrom_buffer);
> -     kfree(ha->nvram);
> -     kfree(ha->npiv_info);
> -     kfree(ha->swl);
> -     kfree(ha->loop_id_map);
> +
> +     if (ha->optrom_buffer)
> +             vfree(ha->optrom_buffer);
> +     if (ha->nvram)
> +             kfree(ha->nvram);
> +     if (ha->npiv_info)
> +             kfree(ha->npiv_info);
> +     if (ha->swl)
> +             kfree(ha->swl);
> +     if (ha->loop_id_map)
> +             kfree(ha->loop_id_map);
>  
>       ha->srb_mempool = NULL;
>       ha->ctx_mempool = NULL;
> @@ -4528,6 +4534,15 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, 
> fc_port_t *fcport,
>       ha->ex_init_cb_dma = 0;
>       ha->async_pd = NULL;
>       ha->async_pd_dma = 0;
> +     ha->loop_id_map = NULL;
> +     ha->npiv_info = NULL;
> +     ha->optrom_buffer = NULL;
> +     ha->swl = NULL;
> +     ha->nvram = NULL;
> +     ha->mctp_dump = NULL;
> +     ha->dcbx_tlv = NULL;
> +     ha->xgmac_data = NULL;
> +     ha->sfp_data = NULL;
>  
>       ha->s_dma_pool = NULL;
>       ha->dl_dma_pool = NULL;
> 
This cries out for a memset()...

Otherwise:

Reviewed-by: Hannes Reinecke <h...@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Teamlead Storage & Networking
h...@suse.de                                   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

Reply via email to