Merged with some modifications in master, stable-2.10, stable-2.9, and stable-2.8. Read on for changes.
On 6 February 2017 at 15:28, Jonathan Rajotte <[email protected]> wrote: > A session teardown can be initiated by a dying application. Hence, a > session object can exist without a valid registry. As a result, > get_session_registry can return null. To prevent this, the UST > application session lock should be held, when possible, when looking up > the registry to ensure synchronization. Otherwise the presence of a > registry is not guaranteed. In such case, handling a null return value > from look-up registry function is necessary. > > Core dumps, triggered by the "assert(registry)" statement found in > reply_ust_register_channel, were observed when killing instrumented > applications. In this occurrence, obtaining the UST application lock > result in a deadlock since the lock is already held during > ust_app_global_create. Handling the null value is simpler and > corresponds with the handling of previous look-up done during the > function. > > Handling of null value is also applied to: > add_event_ust_registry > add_enum_ust_registry > ust_app_snapshot_record > > Signed-off-by: Jonathan Rajotte <[email protected]> > --- > src/bin/lttng-sessiond/ust-app.c | 53 > ++++++++++++++++++++++++++++++++++------ > 1 file changed, 45 insertions(+), 8 deletions(-) > > diff --git a/src/bin/lttng-sessiond/ust-app.c > b/src/bin/lttng-sessiond/ust-app.c > index 5a41c38..4395a59 100644 > --- a/src/bin/lttng-sessiond/ust-app.c > +++ b/src/bin/lttng-sessiond/ust-app.c > @@ -810,6 +810,7 @@ void delete_ust_app_session(int sock, struct > ust_app_session *ua_sess, > ua_sess->deleted = true; > > registry = get_session_registry(ua_sess); > + /* Registry can be null on error path during initialization */ > if (registry) { > /* Push metadata for application before freeing the > application. */ > (void) push_metadata(registry, ua_sess->consumer); > @@ -837,6 +838,10 @@ void delete_ust_app_session(int sock, struct > ust_app_session *ua_sess, > if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) { > struct buffer_reg_pid *reg_pid = > buffer_reg_pid_find(ua_sess->id); > if (reg_pid) { > + /* > + * Registry can be null on error path during > + * initialization Added dots at the end of comments. > + */ > buffer_reg_pid_remove(reg_pid); > buffer_reg_pid_destroy(reg_pid); > } > @@ -2464,6 +2469,8 @@ error: > /* > * Ask the consumer to create a channel and get it if successful. > * > + * Called with UST app session lock held. > + * > * Return 0 on success or else a negative value. > */ > static int do_consumer_create_channel(struct ltt_ust_session *usess, > @@ -2917,6 +2924,8 @@ error: > /* > * Create and send to the application the created buffers with per PID > buffers. > * > + * Called with UST app session lock held. > + * > * Return 0 on success else a negative value. > */ > static int create_channel_per_pid(struct ust_app *app, > @@ -2936,6 +2945,7 @@ static int create_channel_per_pid(struct ust_app *app, > rcu_read_lock(); > > registry = get_session_registry(ua_sess); > + /* The UST app session lock is held, registry shall not be null */ > assert(registry); > > /* Create and add a new channel registry to session. */ > @@ -2973,6 +2983,8 @@ error: > * need and send it to the application. This MUST be called with a RCU read > * side lock acquired. > * > + * Called with UST app session lock held. > + * > * Return 0 on success or else a negative value. Returns -ENOTCONN if > * the application exited concurrently. > */ > @@ -3160,6 +3172,7 @@ static int create_ust_app_metadata(struct > ust_app_session *ua_sess, > assert(consumer); > > registry = get_session_registry(ua_sess); > + /* The UST app session is held registry shall not be null */ > assert(registry); > > pthread_mutex_lock(®istry->lock); > @@ -4296,6 +4309,9 @@ int ust_app_create_event_glb(struct ltt_ust_session > *usess, > > /* > * Start tracing for a specific UST session and app. > + * > + * Called with UST app session lock held. > + * > */ > static > int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) > @@ -4479,6 +4495,8 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, > struct ust_app *app) > health_code_update(); > > registry = get_session_registry(ua_sess); > + > + /* The UST app session is held registry shall not be null */ > assert(registry); > > /* Push metadata for application before freeing the application. */ > @@ -5320,7 +5338,7 @@ static int reply_ust_register_channel(int sock, int > sobjd, int cobjd, > /* Lookup application. If not found, there is a code flow error. */ > app = find_app_by_notify_sock(sock); > if (!app) { > - DBG("Application socket %d is being teardown. Abort event > notify", > + DBG("Application socket %d is being torn down. Abort event > notify", > sock); > ret = 0; > free(fields); > @@ -5330,7 +5348,7 @@ static int reply_ust_register_channel(int sock, int > sobjd, int cobjd, > /* Lookup channel by UST object descriptor. */ > ua_chan = find_channel_by_objd(app, cobjd); > if (!ua_chan) { > - DBG("Application channel is being teardown. Abort event > notify"); > + DBG("Application channel is being torn down. Abort event > notify"); > ret = 0; > free(fields); > goto error_rcu_unlock; > @@ -5341,7 +5359,12 @@ static int reply_ust_register_channel(int sock, int > sobjd, int cobjd, > > /* Get right session registry depending on the session buffer type. */ > registry = get_session_registry(ua_sess); > - assert(registry); > + if (!registry) { > + DBG("Application session is being torn down. Abort event > notify"); > + ret = 0; > + free(fields); I reworked the various free(fields) to perform the free at the end of the function, and set "fields" to NULL only once the ownership has been transfered. > + goto error_rcu_unlock; > + }; > > /* Depending on the buffer type, a different channel key is used. */ > if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) { > @@ -5439,7 +5462,7 @@ static int add_event_ust_registry(int sock, int sobjd, > int cobjd, char *name, > /* Lookup application. If not found, there is a code flow error. */ > app = find_app_by_notify_sock(sock); > if (!app) { > - DBG("Application socket %d is being teardown. Abort event > notify", > + DBG("Application socket %d is being torn down. Abort event > notify", > sock); > ret = 0; > free(sig); > @@ -5451,7 +5474,7 @@ static int add_event_ust_registry(int sock, int sobjd, > int cobjd, char *name, > /* Lookup channel by UST object descriptor. */ > ua_chan = find_channel_by_objd(app, cobjd); > if (!ua_chan) { > - DBG("Application channel is being teardown. Abort event > notify"); > + DBG("Application channel is being torn down. Abort event > notify"); > ret = 0; > free(sig); > free(fields); > @@ -5463,7 +5486,14 @@ static int add_event_ust_registry(int sock, int sobjd, > int cobjd, char *name, > ua_sess = ua_chan->session; > > registry = get_session_registry(ua_sess); > - assert(registry); > + if (!registry) { > + DBG("Application session is being torn down. Abort event > notify"); > + ret = 0; > + free(sig); > + free(fields); > + free(model_emf_uri); Same idea than the modifications done for the "fields" memory ownership transfer. > + goto error_rcu_unlock; > + } > > if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) { > chan_reg_key = ua_chan->tracing_channel_id; > @@ -5551,7 +5581,11 @@ static int add_enum_ust_registry(int sock, int sobjd, > char *name, > } > > registry = get_session_registry(ua_sess); > - assert(registry); > + if (!registry) { > + DBG("Application session is being torn down. Aborting enum > registration."); Modified this logging message to indicate the cause (null registry vs null session). > + free(entries); > + goto error_rcu_unlock; > + } > > pthread_mutex_lock(®istry->lock); > > @@ -5917,7 +5951,10 @@ int ust_app_snapshot_record(struct ltt_ust_session > *usess, > } > > registry = get_session_registry(ua_sess); > - assert(registry); > + if (!registry) { > + DBG("Application session is being torn down. > Abort snapshot record."); > + goto error; Added ret = -1 here to indicate the error. Also, ret could be left to an unexpected value if the ua_sess->channels->ht is empty (it would take the return value of the snprintf() call before). > + } > ret = consumer_snapshot_channel(socket, > registry->metadata_key, output, > 1, ua_sess->euid, ua_sess->egid, > pathname, wait, 0); > if (ret < 0) { > -- > 2.7.4 > -- Jérémie Galarneau EfficiOS Inc. http://www.efficios.com _______________________________________________ lttng-dev mailing list [email protected] https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
