On Sun, Jul 15, 2018 at 10:08:27PM +0900, Namhyung Kim wrote: SNIP
> > Because thread 2 first decrements the refcnt and only after then it > > removes the struct comm_str from the list, the thread 1 can find this > > object on the list with refcnt equls to 0 and hit the assert. > > > > This patch fixes the thread 2 path, by removing the struct comm_str > > FIRST from the list and only AFTER calling comm_str__put on it. This > > way the thread 1 finds only valid objects on the list. > > I'm not sure we can unconditionally remove the comm_str from the tree. > It should be removed only if refcount is going to zero IMHO. > Otherwise it could end up having multiple comm_str entry for a same > name. right, but it wouldn't crash ;-) how about attached change, that actualy deals with the refcnt race I'm running the tests now, seems ok so far jirka --- diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 7798a2cc8a86..592b03548021 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -18,22 +18,27 @@ struct comm_str { static struct rb_root comm_str_root; static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,}; -static struct comm_str *comm_str__get(struct comm_str *cs) +static bool comm_str__get(struct comm_str *cs) { - if (cs) - refcount_inc(&cs->refcnt); - return cs; + return cs ? refcount_inc_not_zero(&cs->refcnt) : false; } -static void comm_str__put(struct comm_str *cs) +static int comm_str__put(struct comm_str *cs, bool lock) { - if (cs && refcount_dec_and_test(&cs->refcnt)) { + if (!cs || !refcount_dec_and_test(&cs->refcnt)) + return 0; + + if (lock) down_write(&comm_str_lock); - rb_erase(&cs->rb_node, &comm_str_root); + + rb_erase(&cs->rb_node, &comm_str_root); + + if (lock) up_write(&comm_str_lock); - zfree(&cs->str); - free(cs); - } + + zfree(&cs->str); + free(cs); + return 1; } static struct comm_str *comm_str__alloc(const char *str) @@ -67,9 +72,22 @@ struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root) parent = *p; iter = rb_entry(parent, struct comm_str, rb_node); - cmp = strcmp(str, iter->str); - if (!cmp) - return comm_str__get(iter); + /* + * If we race with comm_str__put, iter->refcnt == 0 + * and it will be removed within comm_str__put + * thread shortly, ignore it in this search. + */ + if (comm_str__get(iter)) { + cmp = strcmp(str, iter->str); + if (!cmp) + return iter; + /* + * If we actualy had to remove the item, restart + * the search to have the clean tree search. + */ + if (comm_str__put(iter, false)) + return __comm_str__findnew(str, root); + } if (cmp < 0) p = &(*p)->rb_left; @@ -125,7 +143,7 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) if (!new) return -ENOMEM; - comm_str__put(old); + comm_str__put(old, true); comm->comm_str = new; comm->start = timestamp; if (exec) @@ -136,7 +154,7 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) void comm__free(struct comm *comm) { - comm_str__put(comm->comm_str); + comm_str__put(comm->comm_str, true); free(comm); }