On 03/05/2018 06:02 AM, Bill Kuzeja wrote:
> Because of the shifting around of code in qla2x00_probe_one recently,
> failures during adapter initialization can lead to problems, i.e. NULL
> pointer crashes and doubly freed data structures which cause eventual
> panics.
>
> This V2 version makes the relevant memory free routines idempotent, so
> repeat calls won't cause any harm. I also removed the problematic
> probe_init_failed exit point as it is not needed.
>
> Fixes: d64d6c5671db ("scsi: qla2xxx: Fix NULL pointer crash due to probe
> failure")
> Signed-off-by: Bill Kuzeja <[email protected]>
>
> ---
>
> Some of these issues are due to:
>
> commit d64d6c5671db scsi: qla2xxx: Fix NULL pointer crash due to probe failure
>
> where some frees were moved around, as well as the error exit from
> a qla2x00_request_irqs failure.
>
> This was a fix for:
>
> commit d74595278f4a scsi: qla2xxx: Add multiple queue pair functionality.
>
> which caused problems of its own.
>
> To reproduce these issues, I run a test where I break the card early in
> init, (and also through kprobe fault injection). This way, I've been able
> to hit several different types of crashes, all started by failures of
> various routines called throughout the probe.
>
> The problematic routines that fail and their exit points are:
>
> qla2x00_alloc_queues => probe_init_failed
> initialize_adapter => probe_failed
> kthread_create => probe_failed
> scsi_add_host => probe_failed
>
> Exit points are ordered in this way:
>
> probe_init_failed:
> qla2x00_free_req_que(ha, req);
> ha->req_q_map[0] = NULL;
> clear_bit(0, ha->req_qid_map);
> qla2x00_free_rsp_que(ha, rsp);
> ha->rsp_q_map[0] = NULL;
> clear_bit(0, ha->rsp_qid_map);
> ha->max_req_queues = ha->max_rsp_queues = 0;
>
> probe_failed:
> if (base_vha->timer_active)
> qla2x00_stop_timer(base_vha);
> ...
> qla2x00_free_device(base_vha);
>
> scsi_host_put(base_vha->host);
>
> probe_hw_failed:
> qla2x00_mem_free(ha);
> qla2x00_free_req_que(ha, req);
> qla2x00_free_rsp_que(ha, rsp);
> qla2x00_clear_drv_active(ha);
>
> Note that qla2x00_free_device calls qla2x00_mem_free and
> qla2x00_free_rsp_que and qla2x00_free_req_que. So if we exit to
> probe_failed or probe_init_failed, we'll end up calling these
> routines multiple times.
>
> These routines are not idempotent, I am making them so. This solves
> most of the issues.
>
> Also probe_init_failed is not needed. In the place that it is called,
> ha->req_q_map[0] and ha->rsp_q_map[0] are not setup. And we now call
> qla2x00_free_rsp_que/qla2x00_free_req_que below in probe_hw_failed.
> I removed this exit point entirely.
>
> Along the way I found that the return code for qla2x00_alloc_queues
> never really causes us to exit out of the probe routine.
>
> In order to fail...
>
> if (!qla2x00_alloc_queues(ha, req, rsp)) {
>
> ...we must return 0. However, internally, if this routine succeeds it
> returns 1 and if it fails it returns -ENOMEM. So I am modifying
> qla2x00_alloc_queues to fall in line with other return conventions
> where zero is a success (and obviously have changed the probe routine
> accordingly).
>
> One more issue falls out of this case: when qla2x00_abort_all_cmds
> is invoked from qla2x00_free_device and request queues are not
> allocated (qla2x00_alloc_queues has not succeeded), we crash on a NULL
> pointer (ha->req_q_map[que]). So check for this at the start of
> qla2x00_abort_all_cmds and exit accordingly.
>
> I've tested out these changes thoroughly failing initialization at
> various times. I've also used kprobes to inject errors to force us
> into various error paths.
> ---
> drivers/scsi/qla2xxx/qla_os.c | 59
> +++++++++++++++++++++++++++----------------
> 1 file changed, 37 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
> index afcb5567..3860bdfc 100644
> --- a/drivers/scsi/qla2xxx/qla_os.c
> +++ b/drivers/scsi/qla2xxx/qla_os.c
> @@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha,
> struct req_que *req,
> ha->req_q_map[0] = req;
> set_bit(0, ha->rsp_qid_map);
> set_bit(0, ha->req_qid_map);
> - return 1;
> + return 0;
>
> fail_qpair_map:
> kfree(ha->base_qpair);
> @@ -471,6 +471,9 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha,
> struct req_que *req,
>
> static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
> {
> + if (!ha->req_q_map)
> + return;
> +
> if (IS_QLAFX00(ha)) {
> if (req && req->ring_fx00)
> dma_free_coherent(&ha->pdev->dev,
> @@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data
> *ha, struct req_que *req)
> (req->length + 1) * sizeof(request_t),
> req->ring, req->dma);
>
> - if (req)
> + if (req) {
> kfree(req->outstanding_cmds);
> -
> - kfree(req);
> + kfree(req);
> + }
> }
>
> static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
> {
> + if (!ha->rsp_q_map)
> + return;
> +
> if (IS_QLAFX00(ha)) {
> if (rsp && rsp->ring)
> dma_free_coherent(&ha->pdev->dev,
> @@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha,
> struct rsp_que *rsp)
> (rsp->length + 1) * sizeof(response_t),
> rsp->ring, rsp->dma);
> }
> - kfree(rsp);
> + if (rsp)
> + kfree(rsp);
> }
>
> static void qla2x00_free_queues(struct qla_hw_data *ha)
> @@ -1723,6 +1730,8 @@ uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
> struct qla_tgt_cmd *cmd;
> uint8_t trace = 0;
>
> + if (!ha->req_q_map)
> + return;
> spin_lock_irqsave(qp->qp_lock_ptr, flags);
> req = qp->req;
> for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
> @@ -3095,14 +3104,14 @@ static void qla2x00_iocb_work_fn(struct work_struct
> *work)
> /* Set up the irqs */
> ret = qla2x00_request_irqs(ha, rsp);
> if (ret)
> - goto probe_hw_failed;
> + goto probe_failed;
>
> /* Alloc arrays of request and response ring ptrs */
> - if (!qla2x00_alloc_queues(ha, req, rsp)) {
> + if (qla2x00_alloc_queues(ha, req, rsp)) {
> ql_log(ql_log_fatal, base_vha, 0x003d,
> "Failed to allocate memory for queue pointers..."
> "aborting.\n");
> - goto probe_init_failed;
> + goto probe_failed;
> }
>
> if (ha->mqenable && shost_use_blk_mq(host)) {
> @@ -3387,15 +3396,6 @@ static void qla2x00_iocb_work_fn(struct work_struct
> *work)
>
> return 0;
>
> -probe_init_failed:
> - qla2x00_free_req_que(ha, req);
> - ha->req_q_map[0] = NULL;
> - clear_bit(0, ha->req_qid_map);
> - qla2x00_free_rsp_que(ha, rsp);
> - ha->rsp_q_map[0] = NULL;
> - clear_bit(0, ha->rsp_qid_map);
> - ha->max_req_queues = ha->max_rsp_queues = 0;
> -
> probe_failed:
> if (base_vha->timer_active)
> qla2x00_stop_timer(base_vha);
> @@ -4508,11 +4508,17 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha,
> fc_port_t *fcport,
> if (ha->init_cb)
> dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
> ha->init_cb, ha->init_cb_dma);
> - vfree(ha->optrom_buffer);
> - kfree(ha->nvram);
> - kfree(ha->npiv_info);
> - kfree(ha->swl);
> - kfree(ha->loop_id_map);
> +
> + if (ha->optrom_buffer)
> + vfree(ha->optrom_buffer);
> + if (ha->nvram)
> + kfree(ha->nvram);
> + if (ha->npiv_info)
> + kfree(ha->npiv_info);
> + if (ha->swl)
> + kfree(ha->swl);
> + if (ha->loop_id_map)
> + kfree(ha->loop_id_map);
>
> ha->srb_mempool = NULL;
> ha->ctx_mempool = NULL;
> @@ -4528,6 +4534,15 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha,
> fc_port_t *fcport,
> ha->ex_init_cb_dma = 0;
> ha->async_pd = NULL;
> ha->async_pd_dma = 0;
> + ha->loop_id_map = NULL;
> + ha->npiv_info = NULL;
> + ha->optrom_buffer = NULL;
> + ha->swl = NULL;
> + ha->nvram = NULL;
> + ha->mctp_dump = NULL;
> + ha->dcbx_tlv = NULL;
> + ha->xgmac_data = NULL;
> + ha->sfp_data = NULL;
>
> ha->s_dma_pool = NULL;
> ha->dl_dma_pool = NULL;
>
This cries out for a memset()...
Otherwise:
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)