----- On Jul 7, 2015, at 3:29 AM, Peter Zijlstra [email protected] wrote:

> On Tue, Jul 07, 2015 at 02:59:06PM +0930, Arthur Marsh wrote:
>> I had a single, non-reproducible case of the same lock-up happening on my
>> other machine running the Linus git head kernel in 64-bit mode.
> 
> Hmm, disturbing.. I've had my machines run this stuff for weeks and not
> had anything like this :/
> 
> Do you have a serial cable between those machines? serial console output
> will allow capturing more complete traces than these pictures can and
> might also aid in capturing some extra debug info.
> 
> In any case, I'll go try and build some debug code.

Arthur: can you double-check if you load any module with --force ?
This could cause a module header layout mismatch, which can be an
issue with the changes done by the identified commit: the module
header layout changes there.

Also, I'm attaching a small patch which serializes both updates and
reads of the module rbree. Can you try it out ? If the problem
still shows with the spinlocks in place, that would mean the issue
is *not* a race between latched rbtree updates and traversals.

Thanks!

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
From 0d046f205fa49c477bbf81b72cd038fb9f7e40d6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <[email protected]>
Date: Tue, 7 Jul 2015 12:24:37 -0400
Subject: [PATCH] TESTING: add spinlock to module.c rb latch tree

Not-Signed-off-by: Mathieu Desnoyers <[email protected]>
---
 kernel/module.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index 3e0e197..eb99751 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -181,6 +181,9 @@ static struct mod_tree_root {
 #define module_addr_min mod_tree.addr_min
 #define module_addr_max mod_tree.addr_max
 
+/* Fully serialize readers and updates to rb latch tree. */
+static DEFINE_SPINLOCK(test_rb_latch_lock);
+
 static noinline void __mod_tree_insert(struct mod_tree_node *node)
 {
 	latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
@@ -197,31 +200,50 @@ static void __mod_tree_remove(struct mod_tree_node *node)
  */
 static void mod_tree_insert(struct module *mod)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&test_rb_latch_lock, flags);
 	mod->mtn_core.mod = mod;
 	mod->mtn_init.mod = mod;
 
 	__mod_tree_insert(&mod->mtn_core);
 	if (mod->init_size)
 		__mod_tree_insert(&mod->mtn_init);
+	spin_unlock_irqrestore(&test_rb_latch_lock, flags);
 }
 
 static void mod_tree_remove_init(struct module *mod)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&test_rb_latch_lock, flags);
 	if (mod->init_size)
 		__mod_tree_remove(&mod->mtn_init);
+	spin_unlock_irqrestore(&test_rb_latch_lock, flags);
 }
 
 static void mod_tree_remove(struct module *mod)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&test_rb_latch_lock, flags);
 	__mod_tree_remove(&mod->mtn_core);
 	mod_tree_remove_init(mod);
+	spin_unlock_irqrestore(&test_rb_latch_lock, flags);
 }
 
 static struct module *mod_find(unsigned long addr)
 {
 	struct latch_tree_node *ltn;
+	unsigned long flags;
 
+	if (in_nmi()) {
+		printk(KERN_ERR "mod_find called from NMI\n");
+		return NULL;
+	}
+	spin_lock_irqsave(&test_rb_latch_lock, flags);
 	ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+	spin_unlock_irqrestore(&test_rb_latch_lock, flags);
 	if (!ltn)
 		return NULL;
 
-- 
1.9.1

Reply via email to