Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Wed, Sep 10, 2014 at 03:57:09PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: > > > > now I need to recall what I used to test this ;-) > > > > Hint, if you'd mentioned that in the changelog :-) will try much better this time.. perf test attached ;-) seems like the child state fix is a good one jirka --- >From dc1068c627b332b2a5feb4deedfd96db8e056b5f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Sep 2014 16:23:51 +0200 Subject: [PATCH] perf tests: Add test for optimized sched switch bug --- tools/perf/Makefile.perf|1 + tools/perf/tests/builtin-test.c |4 + tools/perf/tests/optimized_switch.c | 169 +++ tools/perf/tests/tests.h|1 + 4 files changed, 175 insertions(+), 0 deletions(-) create mode 100644 tools/perf/tests/optimized_switch.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index da8fc07..75603ac 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -427,6 +427,7 @@ LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o LIB_OBJS += $(OUTPUT)tests/switch-tracking.o LIB_OBJS += $(OUTPUT)tests/xyarray.o +LIB_OBJS += $(OUTPUT)tests/optimized_switch.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5c90326..e092d17 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -162,6 +162,10 @@ static struct test { .func = test__xyarray, }, { + .desc = "Test optimized switch", + .func = test__optimized_switch, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/optimized_switch.c b/tools/perf/tests/optimized_switch.c new file mode 100644 index 000..68f8705 --- /dev/null +++ b/tools/perf/tests/optimized_switch.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include "tests.h" +#include "debug.h" +#include "evsel.h" +#include "evlist.h" +#include "thread_map.h" + +#define GROUPS 10 +#define WORKERS10 +#define BYTES 100 + +struct worker { + union { + int pipefd[2]; + struct { + int read; + int write; + } fd; + }; +}; + +#define pr_errsys(s) \ + pr_err(s " failed: %s\n", strerror(errno)) + +static void worker(int fd) +{ + unsigned char c = 0; + int cnt = BYTES; + ssize_t r; + + while (cnt--) { + r = read(fd, , sizeof(c)); + if (r != 1) { + pr_errsys("read"); + exit(-1); + } + } + + prctl(0, 0, 0, 0, 0); + exit(0); +} + +static void write_round(struct worker *workers) +{ + unsigned char c = 0; + ssize_t r; + int i, j; + + for (j = 0; j < BYTES; j++) { + for (i = 0; i < WORKERS; i++) { + struct worker *w = [i]; + + r = write(w->fd.write, , sizeof(c)); + if (r != 1) { + pr_errsys("write"); + exit(-1); + } + } + } +} + +static int group(void) +{ + struct worker workers[WORKERS]; + int i; + + for (i = 0; i < WORKERS; i++) { + struct worker *w = [i]; + int pid; + + if (pipe(w->pipefd)) { + pr_errsys("pipe"); + return -1; + } + + pid = fork(); + if (pid == -1) { + pr_errsys("fork"); + return -1; + } else if (!pid) { + worker(w->fd.read); + } + } + + write_round(workers); + + for (i = 0; i < WORKERS; i++) + wait(NULL); + + return 0; +} + +static int child(void) +{ + int i; + + for (i = 0; i < GROUPS; i++) + TEST_ASSERT_VAL("group failed", !group()); + + return 0; +} + +int test__optimized_switch(void) +{ + ssize_t r __maybe_unused; + unsigned char c = 0; + int ready[2], pid; + struct thread_map *threads; + struct perf_evsel *evsel; + u64 total; + + if (pipe(ready)) { + pr_errsys("pipe"); + return -1; + } + + pid = fork(); + if (pid == -1) { + pr_errsys("fork"); + return -1; + } + + if (!pid) { + r = read(ready[0], , sizeof(c)); + if (r != 1) { + pr_errsys("read"); + exit(-1); + } + + exit(child()); + } + + threads = thread_map__new(-1, pid,
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: > > now I need to recall what I used to test this ;-) > Hint, if you'd mentioned that in the changelog :-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: now I need to recall what I used to test this ;-) Hint, if you'd mentioned that in the changelog :-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Wed, Sep 10, 2014 at 03:57:09PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: now I need to recall what I used to test this ;-) Hint, if you'd mentioned that in the changelog :-) will try much better this time.. perf test attached ;-) seems like the child state fix is a good one jirka --- From dc1068c627b332b2a5feb4deedfd96db8e056b5f Mon Sep 17 00:00:00 2001 From: Jiri Olsa jo...@redhat.com Date: Wed, 10 Sep 2014 16:23:51 +0200 Subject: [PATCH] perf tests: Add test for optimized sched switch bug --- tools/perf/Makefile.perf|1 + tools/perf/tests/builtin-test.c |4 + tools/perf/tests/optimized_switch.c | 169 +++ tools/perf/tests/tests.h|1 + 4 files changed, 175 insertions(+), 0 deletions(-) create mode 100644 tools/perf/tests/optimized_switch.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index da8fc07..75603ac 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -427,6 +427,7 @@ LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o LIB_OBJS += $(OUTPUT)tests/switch-tracking.o LIB_OBJS += $(OUTPUT)tests/xyarray.o +LIB_OBJS += $(OUTPUT)tests/optimized_switch.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5c90326..e092d17 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -162,6 +162,10 @@ static struct test { .func = test__xyarray, }, { + .desc = Test optimized switch, + .func = test__optimized_switch, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/optimized_switch.c b/tools/perf/tests/optimized_switch.c new file mode 100644 index 000..68f8705 --- /dev/null +++ b/tools/perf/tests/optimized_switch.c @@ -0,0 +1,169 @@ +#include unistd.h +#include stdlib.h +#include sys/prctl.h +#include linux/compiler.h +#include tests.h +#include debug.h +#include evsel.h +#include evlist.h +#include thread_map.h + +#define GROUPS 10 +#define WORKERS10 +#define BYTES 100 + +struct worker { + union { + int pipefd[2]; + struct { + int read; + int write; + } fd; + }; +}; + +#define pr_errsys(s) \ + pr_err(s failed: %s\n, strerror(errno)) + +static void worker(int fd) +{ + unsigned char c = 0; + int cnt = BYTES; + ssize_t r; + + while (cnt--) { + r = read(fd, c, sizeof(c)); + if (r != 1) { + pr_errsys(read); + exit(-1); + } + } + + prctl(0, 0, 0, 0, 0); + exit(0); +} + +static void write_round(struct worker *workers) +{ + unsigned char c = 0; + ssize_t r; + int i, j; + + for (j = 0; j BYTES; j++) { + for (i = 0; i WORKERS; i++) { + struct worker *w = workers[i]; + + r = write(w-fd.write, c, sizeof(c)); + if (r != 1) { + pr_errsys(write); + exit(-1); + } + } + } +} + +static int group(void) +{ + struct worker workers[WORKERS]; + int i; + + for (i = 0; i WORKERS; i++) { + struct worker *w = workers[i]; + int pid; + + if (pipe(w-pipefd)) { + pr_errsys(pipe); + return -1; + } + + pid = fork(); + if (pid == -1) { + pr_errsys(fork); + return -1; + } else if (!pid) { + worker(w-fd.read); + } + } + + write_round(workers); + + for (i = 0; i WORKERS; i++) + wait(NULL); + + return 0; +} + +static int child(void) +{ + int i; + + for (i = 0; i GROUPS; i++) + TEST_ASSERT_VAL(group failed, !group()); + + return 0; +} + +int test__optimized_switch(void) +{ + ssize_t r __maybe_unused; + unsigned char c = 0; + int ready[2], pid; + struct thread_map *threads; + struct perf_evsel *evsel; + u64 total; + + if (pipe(ready)) { + pr_errsys(pipe); + return -1; + } + + pid = fork(); + if (pid == -1) { + pr_errsys(fork); + return -1; + } + + if (!pid) { + r = read(ready[0], c, sizeof(c)); + if (r != 1) { + pr_errsys(read); + exit(-1); + } + + exit(child()); + } + + threads =
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: > I just noticed that we initialize the child state with base parent > state not the real (immediate) parent.. which is what we want IMO > > I wonder attached patch could fix the issue mentioned in: > 1f9a726 perf: Do not allow optimized switch for non-cloned events > > now I need to recall what I used to test this ;-) Ah, very nice! Yes this might just do it. > --- > diff --git a/kernel/events/core.c b/kernel/events/core.c > index e4d6924..561a4ea 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event, > struct perf_event *group_leader, > struct perf_event_context *child_ctx) > { > + enum perf_event_active_state parent_state = parent_event->state; > struct perf_event *child_event; > unsigned long flags; > > @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event, >* not its attr.disabled bit. We hold the parent's mutex, >* so we won't race with perf_event_{en, dis}able_family. >*/ > - if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) > + if (parent_state >= PERF_EVENT_STATE_INACTIVE) > child_event->state = PERF_EVENT_STATE_INACTIVE; > else > child_event->state = PERF_EVENT_STATE_OFF; pgpyUo3adzwlP.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote: I just noticed that we initialize the child state with base parent state not the real (immediate) parent.. which is what we want IMO I wonder attached patch could fix the issue mentioned in: 1f9a726 perf: Do not allow optimized switch for non-cloned events now I need to recall what I used to test this ;-) Ah, very nice! Yes this might just do it. --- diff --git a/kernel/events/core.c b/kernel/events/core.c index e4d6924..561a4ea 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event, struct perf_event *group_leader, struct perf_event_context *child_ctx) { + enum perf_event_active_state parent_state = parent_event-state; struct perf_event *child_event; unsigned long flags; @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event, * not its attr.disabled bit. We hold the parent's mutex, * so we won't race with perf_event_{en, dis}able_family. */ - if (parent_event-state = PERF_EVENT_STATE_INACTIVE) + if (parent_state = PERF_EVENT_STATE_INACTIVE) child_event-state = PERF_EVENT_STATE_INACTIVE; else child_event-state = PERF_EVENT_STATE_OFF; pgpyUo3adzwlP.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 05:13:05PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote: > > On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: > > > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > > > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > > > > > > > The thing is; I don't understand those reasons. That commit log > > > > > > doesn't > > > > > > explain. > > > > > > > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > > > > disallow the cloning. > > > > > > > > > > The thing is, by not allowing this optimization simple things like eg. > > > > > pipe-test say very expensive. > > > > > > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > > > > with exited task") that introduces the problem. Before that things would > > > > work correctly afaict. > > > > > > hum, I dont think so.. because the perf_remove_from_context set event > > > to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events > > > disabled > > > > Urgh, see I knew I was missing something. > > > > Can't we fix that? Lemme check to see what relies on this. > > 2e2af50b1fab ("perf_events: Disable events when we detach them") > > Seems to be about it. And I think we should solve that differently, but > the best I can come up with ties into the event->ctx mess we have in > that other thread. > > The thing is, IOC_ENABLE/DISABLE and read() and such should act > (sanely and) independent from the attached state. > > Its just that the whole event->ctx migration mess is making this > somewhat hard atm. > > So things like perf_event_read() should not only check ctx->is_active > but also worry about event->attach_state & PERF_ATTACH_CONTEXT. > > Now the biggest problem is that we cannot tell if its a temporary state > (move_group / migrate_context) or permanent (exit)... > > Urgh I just noticed that we initialize the child state with base parent state not the real (immediate) parent.. which is what we want IMO I wonder attached patch could fix the issue mentioned in: 1f9a726 perf: Do not allow optimized switch for non-cloned events now I need to recall what I used to test this ;-) jirka --- diff --git a/kernel/events/core.c b/kernel/events/core.c index e4d6924..561a4ea 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event, struct perf_event *group_leader, struct perf_event_context *child_ctx) { + enum perf_event_active_state parent_state = parent_event->state; struct perf_event *child_event; unsigned long flags; @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event, * not its attr.disabled bit. We hold the parent's mutex, * so we won't race with perf_event_{en, dis}able_family. */ - if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) + if (parent_state >= PERF_EVENT_STATE_INACTIVE) child_event->state = PERF_EVENT_STATE_INACTIVE; else child_event->state = PERF_EVENT_STATE_OFF; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: > > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > > > > > The thing is; I don't understand those reasons. That commit log > > > > > doesn't > > > > > explain. > > > > > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > > > disallow the cloning. > > > > > > > > The thing is, by not allowing this optimization simple things like eg. > > > > pipe-test say very expensive. > > > > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > > > with exited task") that introduces the problem. Before that things would > > > work correctly afaict. > > > > hum, I dont think so.. because the perf_remove_from_context set event > > to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events > > disabled > > Urgh, see I knew I was missing something. > > Can't we fix that? Lemme check to see what relies on this. 2e2af50b1fab ("perf_events: Disable events when we detach them") Seems to be about it. And I think we should solve that differently, but the best I can come up with ties into the event->ctx mess we have in that other thread. The thing is, IOC_ENABLE/DISABLE and read() and such should act (sanely and) independent from the attached state. Its just that the whole event->ctx migration mess is making this somewhat hard atm. So things like perf_event_read() should not only check ctx->is_active but also worry about event->attach_state & PERF_ATTACH_CONTEXT. Now the biggest problem is that we cannot tell if its a temporary state (move_group / migrate_context) or permanent (exit)... Urgh pgpp92w5dLyFX.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > > > The thing is; I don't understand those reasons. That commit log doesn't > > > > explain. > > > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > > disallow the cloning. > > > > > > The thing is, by not allowing this optimization simple things like eg. > > > pipe-test say very expensive. > > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > > with exited task") that introduces the problem. Before that things would > > work correctly afaict. > > hum, I dont think so.. because the perf_remove_from_context set event > to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events > disabled Urgh, see I knew I was missing something. Can't we fix that? Lemme check to see what relies on this. pgpPkoqTrGmCQ.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 02:19:52PM +0200, Jiri Olsa wrote: > > I have another 'problem' with 179033b3e064. What if you 'want' to > > continue monitoring after the initial task died? Eg. if you want to > > monitor crap that unconditionally daemonizes. > > right.. did not think of that.. need to check more, but > seems like just the check for children should be enough > Indeed, that should work. > --- > diff --git a/kernel/events/core.c b/kernel/events/core.c > index bf482ccbdbe1..341d0b47ca14 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event > *event, > return n * sizeof(u64); > } > > +static bool is_event_hup(struct perf_event *event) > +{ > + bool no_children; > + > + if (event->state != PERF_EVENT_STATE_EXIT) > + return false; > + > + mutex_lock(>child_mutex); > + no_children = list_empty(>child_list); > + mutex_unlock(>child_mutex); > + return no_children; > +} > + > /* > * Read the performance event - simple non blocking version for now > */ > @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user > *buf, size_t count) >* error state (i.e. because it was pinned but it couldn't be >* scheduled on to the CPU at some point). >*/ > - if ((event->state == PERF_EVENT_STATE_ERROR) || > - (event->state == PERF_EVENT_STATE_EXIT)) > + if ((event->state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event))) > return 0; Do we want this? It seems like a fairly sensible thing to start a counter and wait for the thing to die, only to then read the total count. But with this on we get 0s. I suppose Stephane's email got to you after you did this and we should be dropping this thing entirely? > if (count < event->read_size) > @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, > poll_table *wait) > > poll_wait(file, >waitq, wait); > > - if (event->state == PERF_EVENT_STATE_EXIT) > + if (is_event_hup(event)) > return events; > > /* pgpBkCjUsbHZz.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > > > The thing is; I don't understand those reasons. That commit log doesn't > > > > explain. > > > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > > disallow the cloning. > > > > > > The thing is, by not allowing this optimization simple things like eg. > > > pipe-test say very expensive. > > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > > with exited task") that introduces the problem. Before that things would > > work correctly afaict. > > > > The exit would remove from the context but leave the event in existence. > > Both the fd and the inherited events would have references to it, only > > once those are gone do we destroy the actual event. > > I have another 'problem' with 179033b3e064. What if you 'want' to > continue monitoring after the initial task died? Eg. if you want to > monitor crap that unconditionally daemonizes. > > So at this point I would suggest we revert both 179033b3e064 and > 1f9a7268c67f so that the context switch optimization works again for > simple 2 task things (pipe-test). > > If we re-introduce 179033b3e064 (which I think makes sense) we need to > make sure it works with the context switch optimization (we could add > perf_event::task I think, it would clean up a few other things iirc) and > we'd have to make it conditional so we can still monitor daemons. > > Or am I totally missing things again? as I said in previous email I dont think the 179033b3e064 introduced the issue and I think we should keep 1f9a7268c67f to have correct numbers ;-) I'll think about the fix to keep non-cloned context in optimized switch jirka -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > > > The thing is; I don't understand those reasons. That commit log doesn't > > > > explain. > > > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > > disallow the cloning. > > > > > > The thing is, by not allowing this optimization simple things like eg. > > > pipe-test say very expensive. > > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > > with exited task") that introduces the problem. Before that things would > > work correctly afaict. > > > > The exit would remove from the context but leave the event in existence. > > Both the fd and the inherited events would have references to it, only > > once those are gone do we destroy the actual event. > > I have another 'problem' with 179033b3e064. What if you 'want' to > continue monitoring after the initial task died? Eg. if you want to > monitor crap that unconditionally daemonizes. right.. did not think of that.. need to check more, but seems like just the check for children should be enough jirka --- diff --git a/kernel/events/core.c b/kernel/events/core.c index bf482ccbdbe1..341d0b47ca14 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event *event, return n * sizeof(u64); } +static bool is_event_hup(struct perf_event *event) +{ + bool no_children; + + if (event->state != PERF_EVENT_STATE_EXIT) + return false; + + mutex_lock(>child_mutex); + no_children = list_empty(>child_list); + mutex_unlock(>child_mutex); + return no_children; +} + /* * Read the performance event - simple non blocking version for now */ @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if ((event->state == PERF_EVENT_STATE_ERROR) || - (event->state == PERF_EVENT_STATE_EXIT)) + if ((event->state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event))) return 0; if (count < event->read_size) @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) poll_wait(file, >waitq, wait); - if (event->state == PERF_EVENT_STATE_EXIT) + if (is_event_hup(event)) return events; /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > The thing is; I don't understand those reasons. That commit log doesn't > > > explain. > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > disallow the cloning. > > > > The thing is, by not allowing this optimization simple things like eg. > > pipe-test say very expensive. > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > with exited task") that introduces the problem. Before that things would > work correctly afaict. hum, I dont think so.. because the perf_remove_from_context set event to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events disabled the PERF_EVENT_STATE_EXIT should be just notification for poll to return HUP > The exit would remove from the context but leave the event in existence. > Both the fd and the inherited events would have references to it, only > once those are gone do we destroy the actual event. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > > > The thing is; I don't understand those reasons. That commit log doesn't > > > explain. > > > > Ah wait, I finally see. I think we want to fix that exit path, not > > disallow the cloning. > > > > The thing is, by not allowing this optimization simple things like eg. > > pipe-test say very expensive. > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events > with exited task") that introduces the problem. Before that things would > work correctly afaict. > > The exit would remove from the context but leave the event in existence. > Both the fd and the inherited events would have references to it, only > once those are gone do we destroy the actual event. I have another 'problem' with 179033b3e064. What if you 'want' to continue monitoring after the initial task died? Eg. if you want to monitor crap that unconditionally daemonizes. So at this point I would suggest we revert both 179033b3e064 and 1f9a7268c67f so that the context switch optimization works again for simple 2 task things (pipe-test). If we re-introduce 179033b3e064 (which I think makes sense) we need to make sure it works with the context switch optimization (we could add perf_event::task I think, it would clean up a few other things iirc) and we'd have to make it conditional so we can still monitor daemons. Or am I totally missing things again? pgp2O9F2dr1nX.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: > > The thing is; I don't understand those reasons. That commit log doesn't > > explain. > > Ah wait, I finally see. I think we want to fix that exit path, not > disallow the cloning. > > The thing is, by not allowing this optimization simple things like eg. > pipe-test say very expensive. So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events with exited task") that introduces the problem. Before that things would work correctly afaict. The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. pgpQxWGnfhLc1.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:45:48AM +0200, Peter Zijlstra wrote: > On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote: > > On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: > > > On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: > > > > As described in commit: > > > > 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events > > > > we dont allow optimized switch for non cloned contexts. > > > > > > > > There's no need now to check if one of the context is parent > > > > of the other in context_equiv function. > > > > > > > > > > OK, so talk to me about that patch; it looks like you're slowly > > > reverting: 5a3126d4fe7c ("perf: Fix the perf context switch > > > optimization"). > > > > > > So what exactly is the problem? I'm still jet-lagged and not seeing it. > > > > the problem is, that we cannot allow non-cloned context > > to be part of the optimized switch for reasons explained > > in commit 1f9a7268c67f > > The thing is; I don't understand those reasons. That commit log doesn't > explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. pgpwhKka6o5ck.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote: > On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: > > On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: > > > As described in commit: > > > 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events > > > we dont allow optimized switch for non cloned contexts. > > > > > > There's no need now to check if one of the context is parent > > > of the other in context_equiv function. > > > > > > > OK, so talk to me about that patch; it looks like you're slowly > > reverting: 5a3126d4fe7c ("perf: Fix the perf context switch > > optimization"). > > > > So what exactly is the problem? I'm still jet-lagged and not seeing it. > > the problem is, that we cannot allow non-cloned context > to be part of the optimized switch for reasons explained > in commit 1f9a7268c67f The thing is; I don't understand those reasons. That commit log doesn't explain. pgpkWKICXtVL4.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: > On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: > > As described in commit: > > 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events > > we dont allow optimized switch for non cloned contexts. > > > > There's no need now to check if one of the context is parent > > of the other in context_equiv function. > > > > OK, so talk to me about that patch; it looks like you're slowly > reverting: 5a3126d4fe7c ("perf: Fix the perf context switch > optimization"). > > So what exactly is the problem? I'm still jet-lagged and not seeing it. the problem is, that we cannot allow non-cloned context to be part of the optimized switch for reasons explained in commit 1f9a7268c67f I did not realize there was a just single commit introducing non-cloned context switch to revert ;-) attached I haven't tested it yet.. will do with my other changes thanks, jirka --- This reverts commit 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef. As explained in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we cannot allow optimized switch for non cloned contexts. It suppressed it by forcing the condition for parent contexts. But what we actually should do is to revert the full commit that introduced the optimized switch for non-cloned contexts. This way we get rid of unneeded (now) ctx->generation updates for non-cloned contexts. --- kernel/events/core.c | 64 +++- 1 file changed, 18 insertions(+), 46 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d8cb4d21a346..424e6d3b07b7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -916,7 +916,6 @@ static void unclone_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } - ctx->generation++; } static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) @@ -1154,8 +1153,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; - - ctx->generation++; } /* @@ -1333,8 +1330,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) */ if (event->state > PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_OFF; - - ctx->generation++; } static void perf_group_detach(struct perf_event *event) @@ -2243,38 +2238,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, } /* - * Test whether two contexts are equivalent, i.e. whether they have both been - * cloned from the same version of the same context. - * - * Equivalence is measured using a generation number in the context that is - * incremented on each modification to it; see unclone_ctx(), list_add_event() - * and list_del_event(). + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events + * in them directly with an fd; we can only enable/disable all + * events via prctl, or enable/disable all events in a family + * via ioctl, which will have the same effect on both contexts. */ static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) { - /* Pinning disables the swap optimization */ - if (ctx1->pin_count || ctx2->pin_count) - return 0; - - /* If ctx1 is the parent of ctx2 */ - if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) - return 1; - - /* If ctx2 is the parent of ctx1 */ - if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) - return 1; - - /* -* If ctx1 and ctx2 have the same parent; we flatten the parent -* hierarchy, see perf_event_init_context(). -*/ - if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && - ctx1->parent_gen == ctx2->parent_gen) - return 1; - - /* Unmatched */ - return 0; + return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx + && ctx1->parent_gen == ctx2->parent_gen + && !ctx1->pin_count && !ctx2->pin_count; } static void __perf_event_sync_stat(struct perf_event *event, @@ -2354,7 +2333,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, { struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; - struct perf_event_context *parent, *next_parent; + struct
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. OK, so talk to me about that patch; it looks like you're slowly reverting: 5a3126d4fe7c (perf: Fix the perf context switch optimization). So what exactly is the problem? I'm still jet-lagged and not seeing it. the problem is, that we cannot allow non-cloned context to be part of the optimized switch for reasons explained in commit 1f9a7268c67f I did not realize there was a just single commit introducing non-cloned context switch to revert ;-) attached I haven't tested it yet.. will do with my other changes thanks, jirka --- This reverts commit 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef. As explained in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we cannot allow optimized switch for non cloned contexts. It suppressed it by forcing the condition for parent contexts. But what we actually should do is to revert the full commit that introduced the optimized switch for non-cloned contexts. This way we get rid of unneeded (now) ctx-generation updates for non-cloned contexts. --- kernel/events/core.c | 64 +++- 1 file changed, 18 insertions(+), 46 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d8cb4d21a346..424e6d3b07b7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -916,7 +916,6 @@ static void unclone_ctx(struct perf_event_context *ctx) put_ctx(ctx-parent_ctx); ctx-parent_ctx = NULL; } - ctx-generation++; } static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) @@ -1154,8 +1153,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx-nr_events++; if (event-attr.inherit_stat) ctx-nr_stat++; - - ctx-generation++; } /* @@ -1333,8 +1330,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) */ if (event-state PERF_EVENT_STATE_OFF) event-state = PERF_EVENT_STATE_OFF; - - ctx-generation++; } static void perf_group_detach(struct perf_event *event) @@ -2243,38 +2238,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, } /* - * Test whether two contexts are equivalent, i.e. whether they have both been - * cloned from the same version of the same context. - * - * Equivalence is measured using a generation number in the context that is - * incremented on each modification to it; see unclone_ctx(), list_add_event() - * and list_del_event(). + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events + * in them directly with an fd; we can only enable/disable all + * events via prctl, or enable/disable all events in a family + * via ioctl, which will have the same effect on both contexts. */ static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) { - /* Pinning disables the swap optimization */ - if (ctx1-pin_count || ctx2-pin_count) - return 0; - - /* If ctx1 is the parent of ctx2 */ - if (ctx1 == ctx2-parent_ctx ctx1-generation == ctx2-parent_gen) - return 1; - - /* If ctx2 is the parent of ctx1 */ - if (ctx1-parent_ctx == ctx2 ctx1-parent_gen == ctx2-generation) - return 1; - - /* -* If ctx1 and ctx2 have the same parent; we flatten the parent -* hierarchy, see perf_event_init_context(). -*/ - if (ctx1-parent_ctx ctx1-parent_ctx == ctx2-parent_ctx - ctx1-parent_gen == ctx2-parent_gen) - return 1; - - /* Unmatched */ - return 0; + return ctx1-parent_ctx ctx1-parent_ctx == ctx2-parent_ctx +ctx1-parent_gen == ctx2-parent_gen +!ctx1-pin_count !ctx2-pin_count; } static void __perf_event_sync_stat(struct perf_event *event, @@ -2354,7 +2333,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, { struct perf_event_context *ctx = task-perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; - struct perf_event_context *parent, *next_parent; + struct perf_event_context *parent; struct perf_cpu_context *cpuctx; int
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote: On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. OK, so talk to me about that patch; it looks like you're slowly reverting: 5a3126d4fe7c (perf: Fix the perf context switch optimization). So what exactly is the problem? I'm still jet-lagged and not seeing it. the problem is, that we cannot allow non-cloned context to be part of the optimized switch for reasons explained in commit 1f9a7268c67f The thing is; I don't understand those reasons. That commit log doesn't explain. pgpkWKICXtVL4.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:45:48AM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote: On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote: On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. OK, so talk to me about that patch; it looks like you're slowly reverting: 5a3126d4fe7c (perf: Fix the perf context switch optimization). So what exactly is the problem? I'm still jet-lagged and not seeing it. the problem is, that we cannot allow non-cloned context to be part of the optimized switch for reasons explained in commit 1f9a7268c67f The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. pgpwhKka6o5ck.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. pgpQxWGnfhLc1.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. I have another 'problem' with 179033b3e064. What if you 'want' to continue monitoring after the initial task died? Eg. if you want to monitor crap that unconditionally daemonizes. So at this point I would suggest we revert both 179033b3e064 and 1f9a7268c67f so that the context switch optimization works again for simple 2 task things (pipe-test). If we re-introduce 179033b3e064 (which I think makes sense) we need to make sure it works with the context switch optimization (we could add perf_event::task I think, it would clean up a few other things iirc) and we'd have to make it conditional so we can still monitor daemons. Or am I totally missing things again? pgp2O9F2dr1nX.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. hum, I dont think so.. because the perf_remove_from_context set event to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events disabled the PERF_EVENT_STATE_EXIT should be just notification for poll to return HUP The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. I have another 'problem' with 179033b3e064. What if you 'want' to continue monitoring after the initial task died? Eg. if you want to monitor crap that unconditionally daemonizes. right.. did not think of that.. need to check more, but seems like just the check for children should be enough jirka --- diff --git a/kernel/events/core.c b/kernel/events/core.c index bf482ccbdbe1..341d0b47ca14 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event *event, return n * sizeof(u64); } +static bool is_event_hup(struct perf_event *event) +{ + bool no_children; + + if (event-state != PERF_EVENT_STATE_EXIT) + return false; + + mutex_lock(event-child_mutex); + no_children = list_empty(event-child_list); + mutex_unlock(event-child_mutex); + return no_children; +} + /* * Read the performance event - simple non blocking version for now */ @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if ((event-state == PERF_EVENT_STATE_ERROR) || - (event-state == PERF_EVENT_STATE_EXIT)) + if ((event-state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event))) return 0; if (count event-read_size) @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) poll_wait(file, event-waitq, wait); - if (event-state == PERF_EVENT_STATE_EXIT) + if (is_event_hup(event)) return events; /* -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. The exit would remove from the context but leave the event in existence. Both the fd and the inherited events would have references to it, only once those are gone do we destroy the actual event. I have another 'problem' with 179033b3e064. What if you 'want' to continue monitoring after the initial task died? Eg. if you want to monitor crap that unconditionally daemonizes. So at this point I would suggest we revert both 179033b3e064 and 1f9a7268c67f so that the context switch optimization works again for simple 2 task things (pipe-test). If we re-introduce 179033b3e064 (which I think makes sense) we need to make sure it works with the context switch optimization (we could add perf_event::task I think, it would clean up a few other things iirc) and we'd have to make it conditional so we can still monitor daemons. Or am I totally missing things again? as I said in previous email I dont think the 179033b3e064 introduced the issue and I think we should keep 1f9a7268c67f to have correct numbers ;-) I'll think about the fix to keep non-cloned context in optimized switch jirka -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 02:19:52PM +0200, Jiri Olsa wrote: I have another 'problem' with 179033b3e064. What if you 'want' to continue monitoring after the initial task died? Eg. if you want to monitor crap that unconditionally daemonizes. right.. did not think of that.. need to check more, but seems like just the check for children should be enough Indeed, that should work. --- diff --git a/kernel/events/core.c b/kernel/events/core.c index bf482ccbdbe1..341d0b47ca14 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event *event, return n * sizeof(u64); } +static bool is_event_hup(struct perf_event *event) +{ + bool no_children; + + if (event-state != PERF_EVENT_STATE_EXIT) + return false; + + mutex_lock(event-child_mutex); + no_children = list_empty(event-child_list); + mutex_unlock(event-child_mutex); + return no_children; +} + /* * Read the performance event - simple non blocking version for now */ @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if ((event-state == PERF_EVENT_STATE_ERROR) || - (event-state == PERF_EVENT_STATE_EXIT)) + if ((event-state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event))) return 0; Do we want this? It seems like a fairly sensible thing to start a counter and wait for the thing to die, only to then read the total count. But with this on we get 0s. I suppose Stephane's email got to you after you did this and we should be dropping this thing entirely? if (count event-read_size) @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) poll_wait(file, event-waitq, wait); - if (event-state == PERF_EVENT_STATE_EXIT) + if (is_event_hup(event)) return events; /* pgpBkCjUsbHZz.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. hum, I dont think so.. because the perf_remove_from_context set event to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events disabled Urgh, see I knew I was missing something. Can't we fix that? Lemme check to see what relies on this. pgpPkoqTrGmCQ.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. hum, I dont think so.. because the perf_remove_from_context set event to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events disabled Urgh, see I knew I was missing something. Can't we fix that? Lemme check to see what relies on this. 2e2af50b1fab (perf_events: Disable events when we detach them) Seems to be about it. And I think we should solve that differently, but the best I can come up with ties into the event-ctx mess we have in that other thread. The thing is, IOC_ENABLE/DISABLE and read() and such should act (sanely and) independent from the attached state. Its just that the whole event-ctx migration mess is making this somewhat hard atm. So things like perf_event_read() should not only check ctx-is_active but also worry about event-attach_state PERF_ATTACH_CONTEXT. Now the biggest problem is that we cannot tell if its a temporary state (move_group / migrate_context) or permanent (exit)... Urgh pgpp92w5dLyFX.pgp Description: PGP signature
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Sep 08, 2014 at 05:13:05PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote: On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote: On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote: The thing is; I don't understand those reasons. That commit log doesn't explain. Ah wait, I finally see. I think we want to fix that exit path, not disallow the cloning. The thing is, by not allowing this optimization simple things like eg. pipe-test say very expensive. So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events with exited task) that introduces the problem. Before that things would work correctly afaict. hum, I dont think so.. because the perf_remove_from_context set event to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events disabled Urgh, see I knew I was missing something. Can't we fix that? Lemme check to see what relies on this. 2e2af50b1fab (perf_events: Disable events when we detach them) Seems to be about it. And I think we should solve that differently, but the best I can come up with ties into the event-ctx mess we have in that other thread. The thing is, IOC_ENABLE/DISABLE and read() and such should act (sanely and) independent from the attached state. Its just that the whole event-ctx migration mess is making this somewhat hard atm. So things like perf_event_read() should not only check ctx-is_active but also worry about event-attach_state PERF_ATTACH_CONTEXT. Now the biggest problem is that we cannot tell if its a temporary state (move_group / migrate_context) or permanent (exit)... Urgh I just noticed that we initialize the child state with base parent state not the real (immediate) parent.. which is what we want IMO I wonder attached patch could fix the issue mentioned in: 1f9a726 perf: Do not allow optimized switch for non-cloned events now I need to recall what I used to test this ;-) jirka --- diff --git a/kernel/events/core.c b/kernel/events/core.c index e4d6924..561a4ea 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event, struct perf_event *group_leader, struct perf_event_context *child_ctx) { + enum perf_event_active_state parent_state = parent_event-state; struct perf_event *child_event; unsigned long flags; @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event, * not its attr.disabled bit. We hold the parent's mutex, * so we won't race with perf_event_{en, dis}able_family. */ - if (parent_event-state = PERF_EVENT_STATE_INACTIVE) + if (parent_state = PERF_EVENT_STATE_INACTIVE) child_event-state = PERF_EVENT_STATE_INACTIVE; else child_event-state = PERF_EVENT_STATE_OFF; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: > As described in commit: > 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events > we dont allow optimized switch for non cloned contexts. > > There's no need now to check if one of the context is parent > of the other in context_equiv function. > OK, so talk to me about that patch; it looks like you're slowly reverting: 5a3126d4fe7c ("perf: Fix the perf context switch optimization"). So what exactly is the problem? I'm still jet-lagged and not seeing it. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv
On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote: As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. OK, so talk to me about that patch; it looks like you're slowly reverting: 5a3126d4fe7c (perf: Fix the perf context switch optimization). So what exactly is the problem? I'm still jet-lagged and not seeing it. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/9] perf: Remove redundant parent context check from context_equiv
As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jen-Cheng(Tommy) Huang Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Signed-off-by: Jiri Olsa --- kernel/events/core.c | 8 1 file changed, 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d8cb4d21a346..4ad4ba2bc106 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2257,14 +2257,6 @@ static int context_equiv(struct perf_event_context *ctx1, if (ctx1->pin_count || ctx2->pin_count) return 0; - /* If ctx1 is the parent of ctx2 */ - if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) - return 1; - - /* If ctx2 is the parent of ctx1 */ - if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) - return 1; - /* * If ctx1 and ctx2 have the same parent; we flatten the parent * hierarchy, see perf_event_init_context(). -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/9] perf: Remove redundant parent context check from context_equiv
As described in commit: 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events we dont allow optimized switch for non cloned contexts. There's no need now to check if one of the context is parent of the other in context_equiv function. Cc: Andi Kleen a...@firstfloor.org Cc: Arnaldo Carvalho de Melo a...@kernel.org Cc: Corey Ashford cjash...@linux.vnet.ibm.com Cc: David Ahern dsah...@gmail.com Cc: Frederic Weisbecker fweis...@gmail.com Cc: Ingo Molnar mi...@kernel.org Cc: Jen-Cheng(Tommy) Huang tomm...@gatech.edu Cc: Namhyung Kim namhy...@kernel.org Cc: Paul Mackerras pau...@samba.org Cc: Peter Zijlstra a.p.zijls...@chello.nl Cc: Stephane Eranian eran...@google.com Signed-off-by: Jiri Olsa jo...@kernel.org --- kernel/events/core.c | 8 1 file changed, 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d8cb4d21a346..4ad4ba2bc106 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2257,14 +2257,6 @@ static int context_equiv(struct perf_event_context *ctx1, if (ctx1-pin_count || ctx2-pin_count) return 0; - /* If ctx1 is the parent of ctx2 */ - if (ctx1 == ctx2-parent_ctx ctx1-generation == ctx2-parent_gen) - return 1; - - /* If ctx2 is the parent of ctx1 */ - if (ctx1-parent_ctx == ctx2 ctx1-parent_gen == ctx2-generation) - return 1; - /* * If ctx1 and ctx2 have the same parent; we flatten the parent * hierarchy, see perf_event_init_context(). -- 1.8.3.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/