On 11/04/2016 07:47 AM, James Bottomley wrote:
You know after

if (device_remove_file_self(dev, attr))

returns true that s_active is held and also that KERNFS_SUICIDAL is set
on the node, so the non-self remove paths can simply check for this
flag and return without descending into __kernfs_remove(), which would
mean they never take s_active.  That means we never get the inversion.

Hello James,

The lock inversion is not triggered by the non-self remove paths but by the self-remove path. Anyway, can you have a look at the two attached patches?

Thanks,

Bart.

>From dfd0bfa8b15e8d56a4f1ca3dc781439d10d0b098 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanass...@sandisk.com>
Date: Fri, 4 Nov 2016 10:18:52 -0600
Subject: [PATCH 1/2] kernfs: Introduce a local variable

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
---
 fs/kernfs/file.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 78219d5..da58ea4 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -186,6 +186,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
 				       loff_t *ppos)
 {
 	ssize_t len = min_t(size_t, count, PAGE_SIZE);
+	struct kernfs_node *kn = of->kn;
 	const struct kernfs_ops *ops;
 	char *buf;
 
@@ -202,20 +203,20 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(&of->mutex);
-	if (!kernfs_get_active(of->kn)) {
+	if (!kernfs_get_active(kn)) {
 		len = -ENODEV;
 		mutex_unlock(&of->mutex);
 		goto out_free;
 	}
 
-	of->event = atomic_read(&of->kn->attr.open->event);
-	ops = kernfs_ops(of->kn);
+	of->event = atomic_read(&kn->attr.open->event);
+	ops = kernfs_ops(kn);
 	if (ops->read)
 		len = ops->read(of, buf, len, *ppos);
 	else
 		len = -EINVAL;
 
-	kernfs_put_active(of->kn);
+	kernfs_put_active(kn);
 	mutex_unlock(&of->mutex);
 
 	if (len < 0)
@@ -274,6 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 				size_t count, loff_t *ppos)
 {
 	struct kernfs_open_file *of = kernfs_of(file);
+	struct kernfs_node *kn = of->kn;
 	const struct kernfs_ops *ops;
 	size_t len;
 	char *buf;
@@ -305,19 +307,19 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(&of->mutex);
-	if (!kernfs_get_active(of->kn)) {
+	if (!kernfs_get_active(kn)) {
 		mutex_unlock(&of->mutex);
 		len = -ENODEV;
 		goto out_free;
 	}
 
-	ops = kernfs_ops(of->kn);
+	ops = kernfs_ops(kn);
 	if (ops->write)
 		len = ops->write(of, buf, len, *ppos);
 	else
 		len = -EINVAL;
 
-	kernfs_put_active(of->kn);
+	kernfs_put_active(kn);
 	mutex_unlock(&of->mutex);
 
 	if (len > 0)
-- 
2.10.1

>From f092fc1d40f3746c417e979985346ba169af7a6d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanass...@sandisk.com>
Date: Fri, 4 Nov 2016 10:07:23 -0600
Subject: [PATCH 2/2] Avoid that SCSI device removal through sysfs triggers a
 deadlock

The SCSI core holds scan_mutex around SCSI device addition and
removal operations. sysfs serializes attribute read and write
operations against attribute removal through s_active. Avoid that
grabbing scan_mutex during self-removal of a SCSI device triggers
a deadlock by re-grabbing s_active after self-removal has finished
instead of after kernfs_remove_self() has finished.

Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
---
 fs/kernfs/dir.c        | 19 +++++++++++++------
 fs/kernfs/file.c       |  7 +++++++
 include/linux/kernfs.h |  1 +
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..abd3481 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -426,6 +426,8 @@ void kernfs_put_active(struct kernfs_node *kn)
 	if (unlikely(!kn))
 		return;
 
+	WARN_ON_ONCE(kn->flags & KERNFS_BROKE_A_P);
+
 	if (kernfs_lockdep(kn))
 		rwsem_release(&kn->dep_map, 1, _RET_IP_);
 	v = atomic_dec_return(&kn->active);
@@ -1314,6 +1316,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
  * kernfs_remove_self - remove a kernfs_node from its own method
  * @kn: the self kernfs_node to remove
  *
+ * If kernfs_remove_self() sets KERNFS_BROKE_A_P the caller must invoke
+ * kernfs_unbreak_active_protection().
+ *
  * The caller must be running off of a kernfs operation which is invoked
  * with an active reference - e.g. one of kernfs_ops.  This can be used to
  * implement a file operation which deletes itself.
@@ -1354,6 +1359,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
 	 */
 	if (!(kn->flags & KERNFS_SUICIDAL)) {
 		kn->flags |= KERNFS_SUICIDAL;
+		kn->flags |= KERNFS_BROKE_A_P;
 		__kernfs_remove(kn);
 		kn->flags |= KERNFS_SUICIDED;
 		ret = true;
@@ -1375,13 +1381,14 @@ bool kernfs_remove_self(struct kernfs_node *kn)
 		finish_wait(waitq, &wait);
 		WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
 		ret = false;
-	}
 
-	/*
-	 * This must be done while holding kernfs_mutex; otherwise, waiting
-	 * for SUICIDED && deactivated could finish prematurely.
-	 */
-	kernfs_unbreak_active_protection(kn);
+		/*
+		 * This must be done while holding kernfs_mutex; otherwise,
+		 * waiting for SUICIDED && deactivated could finish
+		 * prematurely.
+		 */
+		kernfs_unbreak_active_protection(kn);
+	}
 
 	mutex_unlock(&kernfs_mutex);
 	return ret;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index da58ea4..be2e540 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -319,6 +319,13 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 	else
 		len = -EINVAL;
 
+	mutex_lock(&kernfs_mutex);
+	if (kn->flags & KERNFS_BROKE_A_P) {
+		kernfs_unbreak_active_protection(kn);
+		kn->flags &= ~KERNFS_BROKE_A_P;
+	}
+	mutex_unlock(&kernfs_mutex);
+
 	kernfs_put_active(kn);
 	mutex_unlock(&of->mutex);
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 7056238..34de7f1 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -46,6 +46,7 @@ enum kernfs_node_flag {
 	KERNFS_SUICIDAL		= 0x0400,
 	KERNFS_SUICIDED		= 0x0800,
 	KERNFS_EMPTY_DIR	= 0x1000,
+	KERNFS_BROKE_A_P	= 0x2000,
 };
 
 /* @flags for kernfs_create_root() */
-- 
2.10.1

Reply via email to