Currently ftrace_open_generic_file gets an event_file from
inode->i_private, and then locks event_mutex and gets refcount.
However, this can cause a race as below scenario;

CPU0                              CPU1
open(kprobe_events)
  trace_remove_event_call()    open(enable)
    lock event_mutex             get event_file from inode->i_private
      event_remove()             wait for unlock event_mutex
        ...
        free event_file
    unlock event_mutex
                                 lock event_mutex
                                 add refcount of event_file->call (*)

So, at (*) point, the event_file is already freed and we
may access the corrupted object.
The same thing could happen on event_call because it is also
directly accessed from i_private on some points.

To avoid this, when opening events/*/*/enable, we have to ensure
the dentry of the file is not unlinked yet, under event_mutex
is locked.

CPU0                              CPU1
open(kprobe_events)
  trace_remove_event_call()    open(enable)
    lock event_mutex             get event_file from inode->i_private
      event_remove()             wait for unlock event_mutex
        ...
        dput(dentry)
        free event_file
    unlock event_mutex
                                 lock event_mutex
                                 check !d_unhashed(dentry) and failed
                                 unlock event_mutex
                                 return -ENODEV

Note: trace_array_get(tr) always ensures existance of tr in
trace_arrays, so above check is not needed in the case that
i_private directly points the tr.

Changes from V1:
 - Use d_unhashed(f_dentry) to ensure the event exists according
   to Oleg's comment.

Signed-off-by: Masami Hiramatsu <[email protected]>
Suggested-by: Oleg Nesterov <[email protected]>
---
 kernel/trace/trace_events.c |   70 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1a5547e..06fdac0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -391,15 +391,32 @@ static void __get_system_dir(struct ftrace_subsystem_dir 
*dir)
        __get_system(dir->subsystem);
 }
 
-static int ftrace_event_call_get(struct ftrace_event_call *call)
+static int __ftrace_event_call_get(struct ftrace_event_call *call)
 {
        int ret = 0;
 
-       mutex_lock(&event_mutex);
        if ((call->flags & TRACE_EVENT_FL_REF_MASK) == TRACE_EVENT_FL_REF_MAX - 
1)
                ret = -EBUSY;
        else
                call->flags++;
+
+       return ret;
+}
+
+static int ftrace_event_call_get(struct ftrace_event_call *call,
+                                struct dentry *dentry)
+{
+       int ret = -ENODEV;
+
+       mutex_lock(&event_mutex);
+       spin_lock(&dentry->d_lock);
+       if (d_unhashed(dentry)) /* This file is already unlinked */
+               goto out_unlock;
+
+       ret = __ftrace_event_call_get(call);
+
+ out_unlock:
+       spin_unlock(&dentry->d_lock);
        mutex_unlock(&event_mutex);
 
        return ret;
@@ -413,6 +430,35 @@ static void ftrace_event_call_put(struct ftrace_event_call 
*call)
        mutex_unlock(&event_mutex);
 }
 
+static int ftrace_event_file_get(struct ftrace_event_file *file,
+                                struct dentry *dentry)
+{
+       int ret = -ENODEV;
+
+       mutex_lock(&event_mutex);
+       spin_lock(&dentry->d_lock);
+       if (d_unhashed(dentry)) /* This file is already unlinked */
+               goto out_unlock;
+
+       ret = __ftrace_event_call_get(file->event_call);
+       if (!ret)
+               file->tr->ref++;
+
+ out_unlock:
+       spin_unlock(&dentry->d_lock);
+       mutex_unlock(&event_mutex);
+
+       return ret;
+}
+
+static void ftrace_event_file_put(struct ftrace_event_file *file)
+{
+       struct trace_array *tr = file->tr;
+
+       ftrace_event_call_put(file->event_call);
+       trace_array_put(tr);
+}
+
 static void __put_system_dir(struct ftrace_subsystem_dir *dir)
 {
        WARN_ON_ONCE(dir->ref_count == 0);
@@ -438,33 +484,27 @@ static void put_system(struct ftrace_subsystem_dir *dir)
 static int tracing_open_generic_file(struct inode *inode, struct file *filp)
 {
        struct ftrace_event_file *file = inode->i_private;
-       struct trace_array *tr = file->tr;
        int ret;
 
-       if (trace_array_get(tr) < 0)
-               return -ENODEV;
-
-       ret = tracing_open_generic(inode, filp);
+       ret = ftrace_event_file_get(file, filp->f_dentry);
        if (ret < 0)
-               goto fail;
+               return ret;
 
-       ret = ftrace_event_call_get(file->event_call);
+       ret = tracing_open_generic(inode, filp);
        if (ret < 0)
                goto fail;
 
        return 0;
  fail:
-       trace_array_put(tr);
+       ftrace_event_file_put(file);
        return ret;
 }
 
 static int tracing_release_generic_file(struct inode *inode, struct file *filp)
 {
        struct ftrace_event_file *file = inode->i_private;
-       struct trace_array *tr = file->tr;
 
-       ftrace_event_call_put(file->event_call);
-       trace_array_put(tr);
+       ftrace_event_file_put(file);
 
        return 0;
 }
@@ -477,7 +517,7 @@ static int tracing_open_generic_call(struct inode *inode, 
struct file *filp)
 {
        struct ftrace_event_call *call = inode->i_private;
 
-       return ftrace_event_call_get(call);
+       return ftrace_event_call_get(call, filp->f_dentry);
 }
 
 static int tracing_release_generic_call(struct inode *inode, struct file *filp)
@@ -1007,7 +1047,7 @@ static int trace_format_open(struct inode *inode, struct 
file *file)
        struct seq_file *m;
        int ret;
 
-       ret = ftrace_event_call_get(call);
+       ret = ftrace_event_call_get(call, file->f_dentry);
        if (ret < 0)
                return ret;
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to