With lockdep enabled, we see a bug from the BMC dynamic-id support:

  BUG: sleeping function called from invalid context at 
../kernel/locking/mutex.c:747
  in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0
  no locks held by swapper/0/0.
  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0-rc7-next-20170829 #2
  Call Trace:
  [c000003fff74f9b0] [c000000000a8d9f8] dump_stack+0xb0/0xf8 (unreliable)
  [c000003fff74f9f0] [c000000000102820] ___might_sleep+0x150/0x180
  [c000003fff74fa50] [c000000000aa98d0] __mutex_lock+0x80/0xa20
  [c000003fff74fb60] [c0000000005f0280] bmc_device_id_handler+0x50/0x1e0
  [c000003fff74fbe0] [c0000000005f0cfc] deliver_response+0xac/0x100
  [c000003fff74fc10] [c0000000005f24e4] handle_one_recv_msg+0x164/0x10e0
  [c000003fff74fd10] [c0000000005f369c] handle_new_recv_msgs+0x23c/0x2b0
  [c000003fff74fd70] [c0000000000cfdf8] tasklet_action+0xc8/0x1b0
  [c000003fff74fdd0] [c000000000ab00d8] __do_softirq+0x1d8/0x4cc
  [c000003fff74fef0] [c0000000000cf608] irq_exit+0xe8/0x120
  [c000003fff74ff10] [c000000000016568] __do_irq+0x108/0x2e0
  [c000003fff74ff90] [c000000000029c10] call_do_irq+0x14/0x24
  [c000000000ff7a70] [c0000000000167d4] do_IRQ+0x94/0x140
  [c000000000ff7ac0] [c000000000008b28] hardware_interrupt_common+0x128/0x130
  --- interrupt: 501 at replay_interrupt_return+0x0/0x4
      LR = arch_local_irq_restore+0x74/0x90
  [c000000000ff7db0] [0000000010004c20] 0x10004c20 (unreliable)
  [c000000000ff7dd0] [c00000000001e3e8] arch_cpu_idle+0xd8/0x130
  [c000000000ff7e00] [c000000000aaef34] default_idle_call+0x44/0x7c
  [c000000000ff7e20] [c00000000012a884] do_idle+0x294/0x2b0
  [c000000000ff7e90] [c00000000012aac0] cpu_startup_entry+0x30/0x50
  [c000000000ff7ec0] [c00000000000d0f8] rest_init+0x1b8/0x1e0
  [c000000000ff7f00] [c000000000dc0d28] start_kernel+0x53c/0x558
  [c000000000ff7f90] [c00000000000ac70] start_here_common+0x1c/0x4ac
  ipmi-powernv ibm,opal:ipmi: Found new BMC (man_id: 0x000000, prod_id: 0xaabb, 
dev_id: 0x20)

- the IPMI response from the device ID query is being invoked at IRQ
context, so using a mutex isn't suitable for dyn_id_lock.

This change makes dyn_id_lock a spinlock instead. The lock is only
protecting from concurrent accesses to the id & dyn_id_expiry members of
struct bmc_device, so we don't need to sleep while it's held.

Reported-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Jeremy Kerr <j...@ozlabs.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c 
b/drivers/char/ipmi/ipmi_msghandler.c
index 9c82d55..7edb482 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -277,7 +277,7 @@ struct bmc_device {
        ipmi_smi_t             intf;
        enum bmc_dyn_device_id_state dyn_id_state;
        unsigned long          dyn_id_expiry;
-       struct mutex           dyn_id_lock; /* protects id & dyn_id* fields */
+       spinlock_t             dyn_id_lock; /* protects id & dyn_id* fields */
        unsigned char          guid[16];
        int                    guid_set;
        char                   name[16];
@@ -2087,6 +2087,7 @@ EXPORT_SYMBOL(ipmi_request_supply_msgs);
 static void bmc_device_id_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
 {
        struct ipmi_device_id id;
+       unsigned long flags;
        int rc;
 
        if ((msg->addr.addr_type != IPMI_SYSTEM_INTERFACE_ADDR_TYPE)
@@ -2098,7 +2099,7 @@ static void bmc_device_id_handler(ipmi_smi_t intf, struct 
ipmi_recv_msg *msg)
         * the mutex over the entire transaction), as we may be called after
         * bmc_get_device_id has timed-out (and so released its mutex).
         */
-       mutex_lock(&intf->bmc->dyn_id_lock);
+       spin_lock_irqsave(&intf->bmc->dyn_id_lock, flags);
 
        rc = ipmi_demangle_device_id(msg->msg.netfn, msg->msg.cmd,
                        msg->msg.data, msg->msg.data_len, &id);
@@ -2117,7 +2118,7 @@ static void bmc_device_id_handler(ipmi_smi_t intf, struct 
ipmi_recv_msg *msg)
        intf->bmc->dyn_id_state = BMC_DEVICE_DYN_ID_STATE_VALID;
        intf->bmc->dyn_id_expiry = jiffies + IPMI_DYN_DEV_ID_EXPIRY;
 out:
-       mutex_unlock(&intf->bmc->dyn_id_lock);
+       spin_unlock_irqrestore(&intf->bmc->dyn_id_lock, flags);
        wake_up(&intf->waitq);
 }
 
@@ -2127,17 +2128,18 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
        struct ipmi_system_interface_addr si;
        ipmi_smi_t intf = bmc->intf;
        struct kernel_ipmi_msg msg;
+       unsigned long flags;
        bool query = false;
        int tmp, rc = 0;
 
-       mutex_lock(&bmc->dyn_id_lock);
+       spin_lock_irqsave(&bmc->dyn_id_lock, flags);
 
        /* if we have a valid and current ID, just return that. */
        if (bmc->dyn_id_state == BMC_DEVICE_DYN_ID_STATE_VALID &&
                        time_is_after_jiffies(bmc->dyn_id_expiry)) {
                if (id)
                        *id = bmc->id;
-               mutex_unlock(&bmc->dyn_id_lock);
+               spin_unlock_irqrestore(&bmc->dyn_id_lock, flags);
                return 0;
        }
 
@@ -2153,7 +2155,7 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
        /* We unlock here, as bmc_device_id_handler will need to re-acquire
         * the mutex
         */
-       mutex_unlock(&bmc->dyn_id_lock);
+       spin_unlock_irqrestore(&bmc->dyn_id_lock, flags);
 
        /* Send Get Device ID request. */
        if (query) {
@@ -2182,9 +2184,9 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
                                      -1, 0);
 
                if (rc) {
-                       mutex_lock(&bmc->dyn_id_lock);
+                       spin_lock_irqsave(&bmc->dyn_id_lock, flags);
                        bmc->dyn_id_state = BMC_DEVICE_DYN_ID_STATE_INVALID;
-                       mutex_unlock(&bmc->dyn_id_lock);
+                       spin_unlock_irqrestore(&bmc->dyn_id_lock, flags);
                        return rc;
                }
        }
@@ -2200,7 +2202,7 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
        if (query)
                intf->null_user_handler = NULL;
 
-       mutex_lock(&bmc->dyn_id_lock);
+       spin_lock_irqsave(&bmc->dyn_id_lock, flags);
        if (!tmp) {
                /* timeout */
                bmc->dyn_id_state = BMC_DEVICE_DYN_ID_STATE_INVALID;
@@ -2209,7 +2211,7 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
                /* We didn't timeout, but another thread has started a new
                 * query after we'd dropped the lock. Wait for that thread
                 * instead. */
-               mutex_unlock(&bmc->dyn_id_lock);
+               spin_unlock_irqrestore(&bmc->dyn_id_lock, flags);
                goto retry;
 
        } else if (bmc->dyn_id_state != BMC_DEVICE_DYN_ID_STATE_VALID) {
@@ -2219,7 +2221,7 @@ static int bmc_get_device_id(struct bmc_device *bmc, 
struct ipmi_device_id *id)
                *id = bmc->id;
        }
 
-       mutex_unlock(&intf->bmc->dyn_id_lock);
+       spin_unlock_irqrestore(&intf->bmc->dyn_id_lock, flags);
 
        return rc;
 }
@@ -2730,7 +2732,7 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
        struct bmc_device *bmc = intf->bmc;
        struct bmc_device *old_bmc;
 
-       mutex_init(&bmc->dyn_id_lock);
+       spin_lock_init(&bmc->dyn_id_lock);
        bmc_get_device_id(bmc, NULL);
 
        mutex_lock(&ipmidriver_mutex);
-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Openipmi-developer mailing list
Openipmi-developer@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openipmi-developer

Reply via email to