On Sun, Jul 15, 2018 at 10:08:27PM +0900, Namhyung Kim wrote:

SNIP

> > Because thread 2 first decrements the refcnt and only after then it
> > removes the struct comm_str from the list, the thread 1 can find this
> > object on the list with refcnt equls to 0 and hit the assert.
> > 
> > This patch fixes the thread 2 path, by removing the struct comm_str
> > FIRST from the list and only AFTER calling comm_str__put on it. This
> > way the thread 1 finds only valid objects on the list.
> 
> I'm not sure we can unconditionally remove the comm_str from the tree.
> It should be removed only if refcount is going to zero IMHO.
> Otherwise it could end up having multiple comm_str entry for a same
> name.

right, but it wouldn't crash ;-)

how about attached change, that actualy deals with the refcnt
race I'm running the tests now, seems ok so far

jirka


---
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 7798a2cc8a86..592b03548021 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -18,22 +18,27 @@ struct comm_str {
 static struct rb_root comm_str_root;
 static struct rw_semaphore comm_str_lock = {.lock = 
PTHREAD_RWLOCK_INITIALIZER,};
 
-static struct comm_str *comm_str__get(struct comm_str *cs)
+static bool comm_str__get(struct comm_str *cs)
 {
-       if (cs)
-               refcount_inc(&cs->refcnt);
-       return cs;
+       return cs ? refcount_inc_not_zero(&cs->refcnt) : false;
 }
 
-static void comm_str__put(struct comm_str *cs)
+static int comm_str__put(struct comm_str *cs, bool lock)
 {
-       if (cs && refcount_dec_and_test(&cs->refcnt)) {
+       if (!cs || !refcount_dec_and_test(&cs->refcnt))
+               return 0;
+
+       if (lock)
                down_write(&comm_str_lock);
-               rb_erase(&cs->rb_node, &comm_str_root);
+
+       rb_erase(&cs->rb_node, &comm_str_root);
+
+       if (lock)
                up_write(&comm_str_lock);
-               zfree(&cs->str);
-               free(cs);
-       }
+
+       zfree(&cs->str);
+       free(cs);
+       return 1;
 }
 
 static struct comm_str *comm_str__alloc(const char *str)
@@ -67,9 +72,22 @@ struct comm_str *__comm_str__findnew(const char *str, struct 
rb_root *root)
                parent = *p;
                iter = rb_entry(parent, struct comm_str, rb_node);
 
-               cmp = strcmp(str, iter->str);
-               if (!cmp)
-                       return comm_str__get(iter);
+               /*
+                * If we race with comm_str__put, iter->refcnt == 0
+                * and it will be removed within comm_str__put
+                * thread shortly, ignore it in this search.
+                */
+               if (comm_str__get(iter)) {
+                       cmp = strcmp(str, iter->str);
+                       if (!cmp)
+                               return iter;
+                       /*
+                        * If we actualy had to remove the item, restart
+                        * the search to have the clean tree search.
+                        */
+                       if (comm_str__put(iter, false))
+                               return __comm_str__findnew(str, root);
+               }
 
                if (cmp < 0)
                        p = &(*p)->rb_left;
@@ -125,7 +143,7 @@ int comm__override(struct comm *comm, const char *str, u64 
timestamp, bool exec)
        if (!new)
                return -ENOMEM;
 
-       comm_str__put(old);
+       comm_str__put(old, true);
        comm->comm_str = new;
        comm->start = timestamp;
        if (exec)
@@ -136,7 +154,7 @@ int comm__override(struct comm *comm, const char *str, u64 
timestamp, bool exec)
 
 void comm__free(struct comm *comm)
 {
-       comm_str__put(comm->comm_str);
+       comm_str__put(comm->comm_str, true);
        free(comm);
 }
 

Reply via email to