Hi Jiri,

On Mon, Jul 16, 2018 at 12:29:34PM +0200, Jiri Olsa wrote:
> On Sun, Jul 15, 2018 at 10:08:27PM +0900, Namhyung Kim wrote:
> 
> SNIP
> 
> > > Because thread 2 first decrements the refcnt and only after then it
> > > removes the struct comm_str from the list, the thread 1 can find this
> > > object on the list with refcnt equls to 0 and hit the assert.
> > > 
> > > This patch fixes the thread 2 path, by removing the struct comm_str
> > > FIRST from the list and only AFTER calling comm_str__put on it. This
> > > way the thread 1 finds only valid objects on the list.
> > 
> > I'm not sure we can unconditionally remove the comm_str from the tree.
> > It should be removed only if refcount is going to zero IMHO.
> > Otherwise it could end up having multiple comm_str entry for a same
> > name.
> 
> right, but it wouldn't crash ;-)
> 
> how about attached change, that actualy deals with the refcnt
> race I'm running the tests now, seems ok so far

I think we can keep if the refcount is back to non-zero.  What about this?
(not tested..)


static struct comm_str *comm_str__get(cs)
{
    if (cs)
        refcount_inc_no_warn(&cs->refcnt);  // should be added
    return cs;
}

static void comm_str__put(cs)
{
    if (cs && refcount_dec_and_test(&cs->refcnt)) {
        down_write(&comm_str_lock);
        /* might race with comm_str__findnew() */
        if (!refcount_read(&cs->refcnt)) {
            rb_erase(&cs->rb_node, &comm_str_root);
            zfree(&cs->str);
            free(cs);
        }
        up_write(&comm_str_lock);
    }
}


Thanks,
Namhyung


> 
> 
> ---
> diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
> index 7798a2cc8a86..592b03548021 100644
> --- a/tools/perf/util/comm.c
> +++ b/tools/perf/util/comm.c
> @@ -18,22 +18,27 @@ struct comm_str {
>  static struct rb_root comm_str_root;
>  static struct rw_semaphore comm_str_lock = {.lock = 
> PTHREAD_RWLOCK_INITIALIZER,};
>  
> -static struct comm_str *comm_str__get(struct comm_str *cs)
> +static bool comm_str__get(struct comm_str *cs)
>  {
> -     if (cs)
> -             refcount_inc(&cs->refcnt);
> -     return cs;
> +     return cs ? refcount_inc_not_zero(&cs->refcnt) : false;
>  }
>  
> -static void comm_str__put(struct comm_str *cs)
> +static int comm_str__put(struct comm_str *cs, bool lock)
>  {
> -     if (cs && refcount_dec_and_test(&cs->refcnt)) {
> +     if (!cs || !refcount_dec_and_test(&cs->refcnt))
> +             return 0;
> +
> +     if (lock)
>               down_write(&comm_str_lock);
> -             rb_erase(&cs->rb_node, &comm_str_root);
> +
> +     rb_erase(&cs->rb_node, &comm_str_root);
> +
> +     if (lock)
>               up_write(&comm_str_lock);
> -             zfree(&cs->str);
> -             free(cs);
> -     }
> +
> +     zfree(&cs->str);
> +     free(cs);
> +     return 1;
>  }
>  
>  static struct comm_str *comm_str__alloc(const char *str)
> @@ -67,9 +72,22 @@ struct comm_str *__comm_str__findnew(const char *str, 
> struct rb_root *root)
>               parent = *p;
>               iter = rb_entry(parent, struct comm_str, rb_node);
>  
> -             cmp = strcmp(str, iter->str);
> -             if (!cmp)
> -                     return comm_str__get(iter);
> +             /*
> +              * If we race with comm_str__put, iter->refcnt == 0
> +              * and it will be removed within comm_str__put
> +              * thread shortly, ignore it in this search.
> +              */
> +             if (comm_str__get(iter)) {
> +                     cmp = strcmp(str, iter->str);
> +                     if (!cmp)
> +                             return iter;
> +                     /*
> +                      * If we actualy had to remove the item, restart
> +                      * the search to have the clean tree search.
> +                      */
> +                     if (comm_str__put(iter, false))
> +                             return __comm_str__findnew(str, root);
> +             }
>  
>               if (cmp < 0)
>                       p = &(*p)->rb_left;
> @@ -125,7 +143,7 @@ int comm__override(struct comm *comm, const char *str, 
> u64 timestamp, bool exec)
>       if (!new)
>               return -ENOMEM;
>  
> -     comm_str__put(old);
> +     comm_str__put(old, true);
>       comm->comm_str = new;
>       comm->start = timestamp;
>       if (exec)
> @@ -136,7 +154,7 @@ int comm__override(struct comm *comm, const char *str, 
> u64 timestamp, bool exec)
>  
>  void comm__free(struct comm *comm)
>  {
> -     comm_str__put(comm->comm_str);
> +     comm_str__put(comm->comm_str, true);
>       free(comm);
>  }
>  

Reply via email to