merged.

On Wed, 27 Apr 2011, Jason Wessel wrote:

The ust-consumerd gets shutdown by the SIGTERM signal and a number of
places in the ust-consumerd did not properly deal with the case where
a system call returns EINTR in errno as a result of a signal to the
process.  The failure to handle EINTR properly was leading to some
data corruption in the buffer code and causing some random "victim"
crashes in lowlevel.c

The way all the offending functions were tracked down was to
temporarily add an abort() in the SIGTERM signal handler.  Then it was
a matter of looking at what threads were blocked on system calls at
the time outside of the thread that received the signal.

Signed-off-by: Jason Wessel <[email protected]>
---
libustconsumer/libustconsumer.c |   25 +++++++++++++++++++------
ust-consumerd/ust-consumerd.c   |   11 ++++++++++-
2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/libustconsumer/libustconsumer.c b/libustconsumer/libustconsumer.c
index c5acffa..6f6d4bb 100644
--- a/libustconsumer/libustconsumer.c
+++ b/libustconsumer/libustconsumer.c
@@ -477,6 +477,8 @@ int consumer_loop(struct ustconsumer_instance *instance, 
struct buffer_info *buf
                        DBG("App died while being traced");
                        finish_consuming_dead_subbuffer(instance->callbacks, 
buf);
                        break;
+               } else if (read_result == -1 && errno == EINTR) {
+                       continue;
                }

                if(instance->callbacks->on_read_subbuffer)
@@ -783,8 +785,11 @@ int ustconsumer_stop_instance(struct ustconsumer_instance 
*instance, int send_ms

        struct sockaddr_un addr;

+socket_again:
        result = fd = socket(PF_UNIX, SOCK_STREAM, 0);
        if(result == -1) {
+               if (errno == EINTR)
+                       goto socket_again;
                PERROR("socket");
                return 1;
        }
@@ -794,13 +799,21 @@ int ustconsumer_stop_instance(struct ustconsumer_instance 
*instance, int send_ms
        strncpy(addr.sun_path, instance->sock_path, UNIX_PATH_MAX);
        addr.sun_path[UNIX_PATH_MAX-1] = '\0';

-       result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
-       if(result == -1) {
-               PERROR("connect");
-       }
+connect_again:
+               result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+               if(result == -1) {
+                       if (errno == EINTR)
+                               goto connect_again;
+                       PERROR("connect");
+               }

-       while(bytes != sizeof(msg))
-               bytes += send(fd, msg, sizeof(msg), 0);
+       while(bytes != sizeof(msg)) {
+               int inc = send(fd, msg, sizeof(msg), 0);
+               if (inc < 0 && errno != EINTR)
+                       break;
+               else
+                       bytes += inc;
+       }

        close(fd);

diff --git a/ust-consumerd/ust-consumerd.c b/ust-consumerd/ust-consumerd.c
index ce2ee40..c961394 100644
--- a/ust-consumerd/ust-consumerd.c
+++ b/ust-consumerd/ust-consumerd.c
@@ -210,7 +210,11 @@ int on_open_buffer(struct ustconsumer_callbacks *data, 
struct buffer_info *buf)
                    trace_path, buf->pid, buf->pidunique, buf->name);
                return 1;
        }
+again:
        result = fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 00600);
+       if (result == -1 && errno == EINTR)
+               goto again;
+
        if(result == -1) {
                PERROR("open");
                ERR("failed opening trace file %s", tmp);
@@ -225,7 +229,12 @@ int on_open_buffer(struct ustconsumer_callbacks *data, 
struct buffer_info *buf)
int on_close_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
{
        struct buffer_info_local *buf_local = buf->user_data;
-       int result = close(buf_local->file_fd);
+       int result;
+
+again:
+       result = close(buf_local->file_fd);
+       if (result == -1 && errno == EINTR)
+               goto again;
        free(buf_local);
        if(result == -1) {
                PERROR("close");
--
1.7.1


_______________________________________________
ltt-dev mailing list
[email protected]
http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev


_______________________________________________
ltt-dev mailing list
[email protected]
http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev

Reply via email to