On Tue, Aug 13, 2019 at 08:51:55AM -0400, Tony Camuso wrote:
> This fixes the problem for the scheduled case, but I don't think it will
> fix it for the run_to_completion case.

Locks are turned off in run_to_completion.  So there shouldn't be
an issue there.

-corey

> 
> I'll have to think about that a bit more, unless you have some insight.
> 
> From d69ada45a95d9c5bcc95329e5bc16a56cd46cc53 Mon Sep 17 00:00:00 2001
> From: Tony Camuso <tcam...@redhat.com>
> Date: Tue, 13 Aug 2019 08:27:57 -0400
> Subject: [PATCH] ipmi: move message checking into the tasklet to avoid
>  deadlocl
> 
> In the source stack trace below, function set_need_watch tries to
> take out the same si_lock that was taken earlier by ipmi_thread.
> 
> ipmi_thread() [drivers/char/ipmi/ipmi_si_intf.c:995]
>  smi_event_handler() [drivers/char/ipmi/ipmi_si_intf.c:765]
>   handle_transaction_done() [drivers/char/ipmi/ipmi_si_intf.c:555]
>    deliver_recv_msg() [drivers/char/ipmi/ipmi_si_intf.c:283]
>     ipmi_smi_msg_received() [drivers/char/ipmi/ipmi_msghandler.c:4503]
>      intf_err_seq() [drivers/char/ipmi/ipmi_msghandler.c:1149]
>       smi_remove_watch() [drivers/char/ipmi/ipmi_msghandler.c:999]
>        set_need_watch() [drivers/char/ipmi/ipmi_si_intf.c:1066]
> 
> Upstream commit e1891cffd4c4896a899337a243273f0e23c028df adds code to
> ipmi_smi_msg_received() to call smi_remove_watch() via intf_err_seq()
> and this seems to be causing the deadlock.
> 
> commit e1891cffd4c4896a899337a243273f0e23c028df
> Author: Corey Minyard <cminy...@mvista.com>
> Date:   Wed Oct 24 15:17:04 2018 -0500
>     ipmi: Make the smi watcher be disabled immediately when not needed
> 
> The fix is to move the message checking code out of ipmi_smi_msg_received
> and into smi_recv_tasklet, which processes the message checking after
> ipmi_smi_msg_received releases its locks.
> 
> Reported by: Osamu Samukawa <osa-samuk...@tg.jp.nec.com>
> Characterized by: Kosuke Tatsukawa <ta...@ab.jp.nec.com>
> Signed-off-by: Tony Camuso <tcam...@redhat.com>
> ---
>  drivers/char/ipmi/ipmi_msghandler.c | 98 
> ++++++++++++++++++-------------------
>  1 file changed, 49 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmi_msghandler.c 
> b/drivers/char/ipmi/ipmi_msghandler.c
> index 6707659cffd6e..13402483f2623 100644
> --- a/drivers/char/ipmi/ipmi_msghandler.c
> +++ b/drivers/char/ipmi/ipmi_msghandler.c
> @@ -4426,6 +4426,54 @@ static void smi_recv_tasklet(unsigned long val)
>       struct ipmi_smi *intf = (struct ipmi_smi *) val;
>       int run_to_completion = intf->run_to_completion;
>       struct ipmi_smi_msg *newmsg = NULL;
> +     struct ipmi_smi_msg *msg = intf->curr_msg;
> +
> +     if (msg != NULL) {
> +
> +             /*
> +              * This is the local response to a command send, start
> +              * the timer for these.  The user_data will not be
> +              * NULL if this is a response send, and we will let
> +              * response sends just go through.
> +              */
> +
> +             /*
> +              * Check for errors, if we get certain errors (ones
> +              * that mean basically we can try again later), we
> +              * ignore them and start the timer.  Otherwise we
> +              * report the error immediately.
> +              */
> +             if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
> +                 && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
> +                 && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
> +                 && (msg->rsp[2] != IPMI_BUS_ERR)
> +                 && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
> +                     int ch = msg->rsp[3] & 0xf;
> +                     struct ipmi_channel *chans;
> +
> +                     /* Got an error sending the message, handle it. */
> +                     chans = READ_ONCE(intf->channel_list)->c;
> +                     if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
> +                         || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
> +                             ipmi_inc_stat(intf, sent_lan_command_errs);
> +                     else
> +                             ipmi_inc_stat(intf, sent_ipmb_command_errs);
> +                     intf_err_seq(intf, msg->msgid, msg->rsp[2]);
> +             } else
> +                     /* The message was sent, start the timer. */
> +                     intf_start_seq_timer(intf, msg->msgid);
> +
> +             if (!run_to_completion)
> +                     spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
> +             /*
> +              * We can get an asynchronous event or receive message in 
> addition
> +              * to commands we send.
> +              */
> +             if (msg == intf->curr_msg)
> +                     intf->curr_msg = NULL;
> +             if (!run_to_completion)
> +                     spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
> +     }
>  
>       /*
>        * Start the next message if available.
> @@ -4478,44 +4526,7 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
>           && (msg->user_data == NULL)) {
>  
>               if (intf->in_shutdown)
> -                     goto free_msg;
> -
> -             /*
> -              * This is the local response to a command send, start
> -              * the timer for these.  The user_data will not be
> -              * NULL if this is a response send, and we will let
> -              * response sends just go through.
> -              */
> -
> -             /*
> -              * Check for errors, if we get certain errors (ones
> -              * that mean basically we can try again later), we
> -              * ignore them and start the timer.  Otherwise we
> -              * report the error immediately.
> -              */
> -             if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
> -                 && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
> -                 && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
> -                 && (msg->rsp[2] != IPMI_BUS_ERR)
> -                 && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
> -                     int ch = msg->rsp[3] & 0xf;
> -                     struct ipmi_channel *chans;
> -
> -                     /* Got an error sending the message, handle it. */
> -
> -                     chans = READ_ONCE(intf->channel_list)->c;
> -                     if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
> -                         || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
> -                             ipmi_inc_stat(intf, sent_lan_command_errs);
> -                     else
> -                             ipmi_inc_stat(intf, sent_ipmb_command_errs);
> -                     intf_err_seq(intf, msg->msgid, msg->rsp[2]);
> -             } else
> -                     /* The message was sent, start the timer. */
> -                     intf_start_seq_timer(intf, msg->msgid);
> -
> -free_msg:
> -             ipmi_free_smi_msg(msg);
> +                     ipmi_free_smi_msg(msg);
>       } else {
>               /*
>                * To preserve message order, we keep a queue and deliver from
> @@ -4529,17 +4540,6 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
>                                              flags);
>       }
>  
> -     if (!run_to_completion)
> -             spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
> -     /*
> -      * We can get an asynchronous event or receive message in addition
> -      * to commands we send.
> -      */
> -     if (msg == intf->curr_msg)
> -             intf->curr_msg = NULL;
> -     if (!run_to_completion)
> -             spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
> -
>       if (run_to_completion)
>               smi_recv_tasklet((unsigned long) intf);
>       else
> -- 
> 2.21.0
> 
> 
> 
> _______________________________________________
> Openipmi-developer mailing list
> Openipmi-developer@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/openipmi-developer


_______________________________________________
Openipmi-developer mailing list
Openipmi-developer@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openipmi-developer

Reply via email to