This fixes the problem for the scheduled case, but I don't think it will
fix it for the run_to_completion case.

I'll have to think about that a bit more, unless you have some insight.

>From d69ada45a95d9c5bcc95329e5bc16a56cd46cc53 Mon Sep 17 00:00:00 2001
From: Tony Camuso <tcam...@redhat.com>
Date: Tue, 13 Aug 2019 08:27:57 -0400
Subject: [PATCH] ipmi: move message checking into the tasklet to avoid
 deadlocl

In the source stack trace below, function set_need_watch tries to
take out the same si_lock that was taken earlier by ipmi_thread.

ipmi_thread() [drivers/char/ipmi/ipmi_si_intf.c:995]
 smi_event_handler() [drivers/char/ipmi/ipmi_si_intf.c:765]
  handle_transaction_done() [drivers/char/ipmi/ipmi_si_intf.c:555]
   deliver_recv_msg() [drivers/char/ipmi/ipmi_si_intf.c:283]
    ipmi_smi_msg_received() [drivers/char/ipmi/ipmi_msghandler.c:4503]
     intf_err_seq() [drivers/char/ipmi/ipmi_msghandler.c:1149]
      smi_remove_watch() [drivers/char/ipmi/ipmi_msghandler.c:999]
       set_need_watch() [drivers/char/ipmi/ipmi_si_intf.c:1066]

Upstream commit e1891cffd4c4896a899337a243273f0e23c028df adds code to
ipmi_smi_msg_received() to call smi_remove_watch() via intf_err_seq()
and this seems to be causing the deadlock.

commit e1891cffd4c4896a899337a243273f0e23c028df
Author: Corey Minyard <cminy...@mvista.com>
Date:   Wed Oct 24 15:17:04 2018 -0500
    ipmi: Make the smi watcher be disabled immediately when not needed

The fix is to move the message checking code out of ipmi_smi_msg_received
and into smi_recv_tasklet, which processes the message checking after
ipmi_smi_msg_received releases its locks.

Reported by: Osamu Samukawa <osa-samuk...@tg.jp.nec.com>
Characterized by: Kosuke Tatsukawa <ta...@ab.jp.nec.com>
Signed-off-by: Tony Camuso <tcam...@redhat.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 98 ++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c 
b/drivers/char/ipmi/ipmi_msghandler.c
index 6707659cffd6e..13402483f2623 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4426,6 +4426,54 @@ static void smi_recv_tasklet(unsigned long val)
        struct ipmi_smi *intf = (struct ipmi_smi *) val;
        int run_to_completion = intf->run_to_completion;
        struct ipmi_smi_msg *newmsg = NULL;
+       struct ipmi_smi_msg *msg = intf->curr_msg;
+
+       if (msg != NULL) {
+
+               /*
+                * This is the local response to a command send, start
+                * the timer for these.  The user_data will not be
+                * NULL if this is a response send, and we will let
+                * response sends just go through.
+                */
+
+               /*
+                * Check for errors, if we get certain errors (ones
+                * that mean basically we can try again later), we
+                * ignore them and start the timer.  Otherwise we
+                * report the error immediately.
+                */
+               if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
+                   && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
+                   && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
+                   && (msg->rsp[2] != IPMI_BUS_ERR)
+                   && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
+                       int ch = msg->rsp[3] & 0xf;
+                       struct ipmi_channel *chans;
+
+                       /* Got an error sending the message, handle it. */
+                       chans = READ_ONCE(intf->channel_list)->c;
+                       if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
+                           || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
+                               ipmi_inc_stat(intf, sent_lan_command_errs);
+                       else
+                               ipmi_inc_stat(intf, sent_ipmb_command_errs);
+                       intf_err_seq(intf, msg->msgid, msg->rsp[2]);
+               } else
+                       /* The message was sent, start the timer. */
+                       intf_start_seq_timer(intf, msg->msgid);
+
+               if (!run_to_completion)
+                       spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+               /*
+                * We can get an asynchronous event or receive message in 
addition
+                * to commands we send.
+                */
+               if (msg == intf->curr_msg)
+                       intf->curr_msg = NULL;
+               if (!run_to_completion)
+                       spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+       }
 
        /*
         * Start the next message if available.
@@ -4478,44 +4526,7 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
            && (msg->user_data == NULL)) {
 
                if (intf->in_shutdown)
-                       goto free_msg;
-
-               /*
-                * This is the local response to a command send, start
-                * the timer for these.  The user_data will not be
-                * NULL if this is a response send, and we will let
-                * response sends just go through.
-                */
-
-               /*
-                * Check for errors, if we get certain errors (ones
-                * that mean basically we can try again later), we
-                * ignore them and start the timer.  Otherwise we
-                * report the error immediately.
-                */
-               if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
-                   && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
-                   && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
-                   && (msg->rsp[2] != IPMI_BUS_ERR)
-                   && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
-                       int ch = msg->rsp[3] & 0xf;
-                       struct ipmi_channel *chans;
-
-                       /* Got an error sending the message, handle it. */
-
-                       chans = READ_ONCE(intf->channel_list)->c;
-                       if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
-                           || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
-                               ipmi_inc_stat(intf, sent_lan_command_errs);
-                       else
-                               ipmi_inc_stat(intf, sent_ipmb_command_errs);
-                       intf_err_seq(intf, msg->msgid, msg->rsp[2]);
-               } else
-                       /* The message was sent, start the timer. */
-                       intf_start_seq_timer(intf, msg->msgid);
-
-free_msg:
-               ipmi_free_smi_msg(msg);
+                       ipmi_free_smi_msg(msg);
        } else {
                /*
                 * To preserve message order, we keep a queue and deliver from
@@ -4529,17 +4540,6 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
                                               flags);
        }
 
-       if (!run_to_completion)
-               spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
-       /*
-        * We can get an asynchronous event or receive message in addition
-        * to commands we send.
-        */
-       if (msg == intf->curr_msg)
-               intf->curr_msg = NULL;
-       if (!run_to_completion)
-               spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
-
        if (run_to_completion)
                smi_recv_tasklet((unsigned long) intf);
        else
-- 
2.21.0



_______________________________________________
Openipmi-developer mailing list
Openipmi-developer@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openipmi-developer

Reply via email to