From 144aa8c3020c72a68a57f5d1496ed9a83d013087 Mon Sep 17 00:00:00 2001
From: Tony Camuso <tcam...@redhat.com>
Date: Tue, 20 Aug 2019 14:51:26 -0400
Subject: [PATCH] ipmi: move message error checking to avoid deadlock
In the source stack trace below, function set_need_watch tries to
take out the same si_lock that was taken earlier by ipmi_thread.
ipmi_thread() [drivers/char/ipmi/ipmi_si_intf.c:995]
smi_event_handler() [drivers/char/ipmi/ipmi_si_intf.c:765]
handle_transaction_done() [drivers/char/ipmi/ipmi_si_intf.c:555]
deliver_recv_msg() [drivers/char/ipmi/ipmi_si_intf.c:283]
ipmi_smi_msg_received() [drivers/char/ipmi/ipmi_msghandler.c:4503]
intf_err_seq() [drivers/char/ipmi/ipmi_msghandler.c:1149]
smi_remove_watch() [drivers/char/ipmi/ipmi_msghandler.c:999]
set_need_watch() [drivers/char/ipmi/ipmi_si_intf.c:1066]
Upstream commit e1891cffd4c4896a899337a243273f0e23c028df adds code to
ipmi_smi_msg_received() to call smi_remove_watch() via intf_err_seq()
and this seems to be causing the deadlock.
commit e1891cffd4c4896a899337a243273f0e23c028df
Author: Corey Minyard <cminy...@mvista.com>
Date: Wed Oct 24 15:17:04 2018 -0500
ipmi: Make the smi watcher be disabled immediately when not needed
The fix is to put all messages in the queue and move the message
checking code out of ipmi_smi_msg_received and into handle_one_recv_msg,
which processes the message checking after ipmi_thread releases its
locks.
Reported by: Osamu Samukawa <osa-samuk...@tg.jp.nec.com>
Characterized by: Kosuke Tatsukawa <ta...@ab.jp.nec.com>
Signed-off-by: Tony Camuso <tcam...@redhat.com>
---
drivers/char/ipmi/ipmi_msghandler.c | 113 ++++++++++++++++++------------------
1 file changed, 56 insertions(+), 57 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_msghandler.c
b/drivers/char/ipmi/ipmi_msghandler.c
index 6707659cffd6..0a907cd62dc6 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4215,7 +4215,52 @@ static int handle_one_recv_msg(struct ipmi_smi *intf,
int chan;
ipmi_debug_msg("Recv:", msg->rsp, msg->rsp_size);
- if (msg->rsp_size < 2) {
+
+ if ((msg->data_size >= 2)
+ && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
+ && (msg->data[1] == IPMI_SEND_MSG_CMD)
+ && (msg->user_data == NULL)) {
+
+ if (intf->in_shutdown)
+ goto free_msg;
+
+ /*
+ * This is the local response to a command send, start
+ * the timer for these. The user_data will not be
+ * NULL if this is a response send, and we will let
+ * response sends just go through.
+ */
+
+ /*
+ * Check for errors, if we get certain errors (ones
+ * that mean basically we can try again later), we
+ * ignore them and start the timer. Otherwise we
+ * report the error immediately.
+ */
+ if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
+ && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
+ && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
+ && (msg->rsp[2] != IPMI_BUS_ERR)
+ && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
+ int ch = msg->rsp[3] & 0xf;
+ struct ipmi_channel *chans;
+
+ /* Got an error sending the message, handle it. */
+
+ chans = READ_ONCE(intf->channel_list)->c;
+ if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
+ || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
+ ipmi_inc_stat(intf, sent_lan_command_errs);
+ else
+ ipmi_inc_stat(intf, sent_ipmb_command_errs);
+ intf_err_seq(intf, msg->msgid, msg->rsp[2]);
+ } else
+ /* The message was sent, start the timer. */
+ intf_start_seq_timer(intf, msg->msgid);
+free_msg:
+ ipmi_free_smi_msg(msg);
+
+ } else if (msg->rsp_size < 2) {
/* Message is too small to be correct. */
dev_warn(intf->si_dev,
"BMC returned too small a message for netfn %x cmd %x, got
%d bytes\n",
@@ -4472,62 +4517,16 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
unsigned long flags = 0; /* keep us warning-free. */
int run_to_completion = intf->run_to_completion;
- if ((msg->data_size >= 2)
- && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
- && (msg->data[1] == IPMI_SEND_MSG_CMD)
- && (msg->user_data == NULL)) {
-
- if (intf->in_shutdown)
- goto free_msg;
-
- /*
- * This is the local response to a command send, start
- * the timer for these. The user_data will not be
- * NULL if this is a response send, and we will let
- * response sends just go through.
- */
-
- /*
- * Check for errors, if we get certain errors (ones
- * that mean basically we can try again later), we
- * ignore them and start the timer. Otherwise we
- * report the error immediately.
- */
- if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
- && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
- && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
- && (msg->rsp[2] != IPMI_BUS_ERR)
- && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
- int ch = msg->rsp[3] & 0xf;
- struct ipmi_channel *chans;
-
- /* Got an error sending the message, handle it. */
-
- chans = READ_ONCE(intf->channel_list)->c;
- if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
- || (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
- ipmi_inc_stat(intf, sent_lan_command_errs);
- else
- ipmi_inc_stat(intf, sent_ipmb_command_errs);
- intf_err_seq(intf, msg->msgid, msg->rsp[2]);
- } else
- /* The message was sent, start the timer. */
- intf_start_seq_timer(intf, msg->msgid);
-
-free_msg:
- ipmi_free_smi_msg(msg);
- } else {
- /*
- * To preserve message order, we keep a queue and deliver from
- * a tasklet.
- */
- if (!run_to_completion)
- spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
- list_add_tail(&msg->link, &intf->waiting_rcv_msgs);
- if (!run_to_completion)
- spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
- flags);
- }
+ /*
+ * To preserve message order, we keep a queue and deliver from
+ * a tasklet.
+ */
+ if (!run_to_completion)
+ spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
+ list_add_tail(&msg->link, &intf->waiting_rcv_msgs);
+ if (!run_to_completion)
+ spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
+ flags);
if (!run_to_completion)
spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
--
2.18.1
_______________________________________________
Openipmi-developer mailing list
Openipmi-developer@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openipmi-developer