Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-10 Thread Jiri Olsa
On Wed, Sep 10, 2014 at 03:57:09PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
> > 
> > now I need to recall what I used to test this ;-)
> > 
> 
> Hint, if you'd mentioned that in the changelog :-)

will try much better this time.. perf test attached ;-)
seems like the child state fix is a good one

jirka


---
>From dc1068c627b332b2a5feb4deedfd96db8e056b5f Mon Sep 17 00:00:00 2001
From: Jiri Olsa 
Date: Wed, 10 Sep 2014 16:23:51 +0200
Subject: [PATCH] perf tests: Add test for optimized sched switch bug

---
 tools/perf/Makefile.perf|1 +
 tools/perf/tests/builtin-test.c |4 +
 tools/perf/tests/optimized_switch.c |  169 +++
 tools/perf/tests/tests.h|1 +
 4 files changed, 175 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/tests/optimized_switch.c

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index da8fc07..75603ac 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -427,6 +427,7 @@ LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
 LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
 LIB_OBJS += $(OUTPUT)tests/switch-tracking.o
 LIB_OBJS += $(OUTPUT)tests/xyarray.o
+LIB_OBJS += $(OUTPUT)tests/optimized_switch.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5c90326..e092d17 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -162,6 +162,10 @@ static struct test {
.func = test__xyarray,
},
{
+   .desc = "Test optimized switch",
+   .func = test__optimized_switch,
+   },
+   {
.func = NULL,
},
 };
diff --git a/tools/perf/tests/optimized_switch.c 
b/tools/perf/tests/optimized_switch.c
new file mode 100644
index 000..68f8705
--- /dev/null
+++ b/tools/perf/tests/optimized_switch.c
@@ -0,0 +1,169 @@
+#include 
+#include 
+#include 
+#include 
+#include "tests.h"
+#include "debug.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "thread_map.h"
+
+#define GROUPS 10
+#define WORKERS10
+#define BYTES  100
+
+struct worker {
+   union {
+   int pipefd[2];
+   struct {
+   int read;
+   int write;
+   } fd;
+   };
+};
+
+#define pr_errsys(s) \
+   pr_err(s " failed: %s\n", strerror(errno))
+
+static void worker(int fd)
+{
+   unsigned char c = 0;
+   int cnt = BYTES;
+   ssize_t r;
+
+   while (cnt--) {
+   r = read(fd, , sizeof(c));
+   if (r != 1) {
+   pr_errsys("read");
+   exit(-1);
+   }
+   }
+
+   prctl(0, 0, 0, 0, 0);
+   exit(0);
+}
+
+static void write_round(struct worker *workers)
+{
+   unsigned char c = 0;
+   ssize_t r;
+   int i, j;
+
+   for (j = 0; j < BYTES; j++) {
+   for (i = 0; i < WORKERS; i++) {
+   struct worker *w = [i];
+
+   r = write(w->fd.write, , sizeof(c));
+   if (r != 1) {
+   pr_errsys("write");
+   exit(-1);
+   }
+   }
+   }
+}
+
+static int group(void)
+{
+   struct worker workers[WORKERS];
+   int i;
+
+   for (i = 0; i < WORKERS; i++) {
+   struct worker *w = [i];
+   int pid;
+
+   if (pipe(w->pipefd)) {
+   pr_errsys("pipe");
+   return -1;
+   }
+
+   pid = fork();
+   if (pid == -1) {
+   pr_errsys("fork");
+   return -1;
+   } else if (!pid) {
+   worker(w->fd.read);
+   }
+   }
+
+   write_round(workers);
+
+   for (i = 0; i < WORKERS; i++)
+   wait(NULL);
+
+   return 0;
+}
+
+static int child(void)
+{
+   int i;
+
+   for (i = 0; i < GROUPS; i++)
+   TEST_ASSERT_VAL("group failed", !group());
+
+   return 0;
+}
+
+int test__optimized_switch(void)
+{
+   ssize_t r __maybe_unused;
+   unsigned char c = 0;
+   int ready[2], pid;
+   struct thread_map *threads;
+   struct perf_evsel *evsel;
+   u64 total;
+
+   if (pipe(ready)) {
+   pr_errsys("pipe");
+   return -1;
+   }
+
+   pid = fork();
+   if (pid == -1) {
+   pr_errsys("fork");
+   return -1;
+   }
+
+   if (!pid) {
+   r = read(ready[0], , sizeof(c));
+   if (r != 1) {
+   pr_errsys("read");
+   exit(-1);
+   }
+
+   exit(child());
+   }
+
+   threads = thread_map__new(-1, pid, 

Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-10 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
> 
> now I need to recall what I used to test this ;-)
> 

Hint, if you'd mentioned that in the changelog :-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-10 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
 
 now I need to recall what I used to test this ;-)
 

Hint, if you'd mentioned that in the changelog :-)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-10 Thread Jiri Olsa
On Wed, Sep 10, 2014 at 03:57:09PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
  
  now I need to recall what I used to test this ;-)
  
 
 Hint, if you'd mentioned that in the changelog :-)

will try much better this time.. perf test attached ;-)
seems like the child state fix is a good one

jirka


---
From dc1068c627b332b2a5feb4deedfd96db8e056b5f Mon Sep 17 00:00:00 2001
From: Jiri Olsa jo...@redhat.com
Date: Wed, 10 Sep 2014 16:23:51 +0200
Subject: [PATCH] perf tests: Add test for optimized sched switch bug

---
 tools/perf/Makefile.perf|1 +
 tools/perf/tests/builtin-test.c |4 +
 tools/perf/tests/optimized_switch.c |  169 +++
 tools/perf/tests/tests.h|1 +
 4 files changed, 175 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/tests/optimized_switch.c

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index da8fc07..75603ac 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -427,6 +427,7 @@ LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
 LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
 LIB_OBJS += $(OUTPUT)tests/switch-tracking.o
 LIB_OBJS += $(OUTPUT)tests/xyarray.o
+LIB_OBJS += $(OUTPUT)tests/optimized_switch.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5c90326..e092d17 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -162,6 +162,10 @@ static struct test {
.func = test__xyarray,
},
{
+   .desc = Test optimized switch,
+   .func = test__optimized_switch,
+   },
+   {
.func = NULL,
},
 };
diff --git a/tools/perf/tests/optimized_switch.c 
b/tools/perf/tests/optimized_switch.c
new file mode 100644
index 000..68f8705
--- /dev/null
+++ b/tools/perf/tests/optimized_switch.c
@@ -0,0 +1,169 @@
+#include unistd.h
+#include stdlib.h
+#include sys/prctl.h
+#include linux/compiler.h
+#include tests.h
+#include debug.h
+#include evsel.h
+#include evlist.h
+#include thread_map.h
+
+#define GROUPS 10
+#define WORKERS10
+#define BYTES  100
+
+struct worker {
+   union {
+   int pipefd[2];
+   struct {
+   int read;
+   int write;
+   } fd;
+   };
+};
+
+#define pr_errsys(s) \
+   pr_err(s  failed: %s\n, strerror(errno))
+
+static void worker(int fd)
+{
+   unsigned char c = 0;
+   int cnt = BYTES;
+   ssize_t r;
+
+   while (cnt--) {
+   r = read(fd, c, sizeof(c));
+   if (r != 1) {
+   pr_errsys(read);
+   exit(-1);
+   }
+   }
+
+   prctl(0, 0, 0, 0, 0);
+   exit(0);
+}
+
+static void write_round(struct worker *workers)
+{
+   unsigned char c = 0;
+   ssize_t r;
+   int i, j;
+
+   for (j = 0; j  BYTES; j++) {
+   for (i = 0; i  WORKERS; i++) {
+   struct worker *w = workers[i];
+
+   r = write(w-fd.write, c, sizeof(c));
+   if (r != 1) {
+   pr_errsys(write);
+   exit(-1);
+   }
+   }
+   }
+}
+
+static int group(void)
+{
+   struct worker workers[WORKERS];
+   int i;
+
+   for (i = 0; i  WORKERS; i++) {
+   struct worker *w = workers[i];
+   int pid;
+
+   if (pipe(w-pipefd)) {
+   pr_errsys(pipe);
+   return -1;
+   }
+
+   pid = fork();
+   if (pid == -1) {
+   pr_errsys(fork);
+   return -1;
+   } else if (!pid) {
+   worker(w-fd.read);
+   }
+   }
+
+   write_round(workers);
+
+   for (i = 0; i  WORKERS; i++)
+   wait(NULL);
+
+   return 0;
+}
+
+static int child(void)
+{
+   int i;
+
+   for (i = 0; i  GROUPS; i++)
+   TEST_ASSERT_VAL(group failed, !group());
+
+   return 0;
+}
+
+int test__optimized_switch(void)
+{
+   ssize_t r __maybe_unused;
+   unsigned char c = 0;
+   int ready[2], pid;
+   struct thread_map *threads;
+   struct perf_evsel *evsel;
+   u64 total;
+
+   if (pipe(ready)) {
+   pr_errsys(pipe);
+   return -1;
+   }
+
+   pid = fork();
+   if (pid == -1) {
+   pr_errsys(fork);
+   return -1;
+   }
+
+   if (!pid) {
+   r = read(ready[0], c, sizeof(c));
+   if (r != 1) {
+   pr_errsys(read);
+   exit(-1);
+   }
+
+   exit(child());
+   }
+
+   threads = 

Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-09 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
> I just noticed that we initialize the child state with base parent
> state not the real (immediate) parent.. which is what we want IMO
> 
> I wonder attached patch could fix the issue mentioned in:
>   1f9a726 perf: Do not allow optimized switch for non-cloned events
> 
> now I need to recall what I used to test this ;-)

Ah, very nice! Yes this might just do it.

> ---
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index e4d6924..561a4ea 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event,
> struct perf_event *group_leader,
> struct perf_event_context *child_ctx)
>  {
> + enum perf_event_active_state parent_state = parent_event->state;
>   struct perf_event *child_event;
>   unsigned long flags;
>  
> @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event,
>* not its attr.disabled bit.  We hold the parent's mutex,
>* so we won't race with perf_event_{en, dis}able_family.
>*/
> - if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
> + if (parent_state >= PERF_EVENT_STATE_INACTIVE)
>   child_event->state = PERF_EVENT_STATE_INACTIVE;
>   else
>   child_event->state = PERF_EVENT_STATE_OFF;


pgpyUo3adzwlP.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-09 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 06:45:24PM +0200, Jiri Olsa wrote:
 I just noticed that we initialize the child state with base parent
 state not the real (immediate) parent.. which is what we want IMO
 
 I wonder attached patch could fix the issue mentioned in:
   1f9a726 perf: Do not allow optimized switch for non-cloned events
 
 now I need to recall what I used to test this ;-)

Ah, very nice! Yes this might just do it.

 ---
 diff --git a/kernel/events/core.c b/kernel/events/core.c
 index e4d6924..561a4ea 100644
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
 @@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event,
 struct perf_event *group_leader,
 struct perf_event_context *child_ctx)
  {
 + enum perf_event_active_state parent_state = parent_event-state;
   struct perf_event *child_event;
   unsigned long flags;
  
 @@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event,
* not its attr.disabled bit.  We hold the parent's mutex,
* so we won't race with perf_event_{en, dis}able_family.
*/
 - if (parent_event-state = PERF_EVENT_STATE_INACTIVE)
 + if (parent_state = PERF_EVENT_STATE_INACTIVE)
   child_event-state = PERF_EVENT_STATE_INACTIVE;
   else
   child_event-state = PERF_EVENT_STATE_OFF;


pgpyUo3adzwlP.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 05:13:05PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
> > > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> > > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> > > > 
> > > > > > The thing is; I don't understand those reasons. That commit log 
> > > > > > doesn't
> > > > > > explain.
> > > > > 
> > > > > Ah wait, I finally see. I think we want to fix that exit path, not
> > > > > disallow the cloning.
> > > > > 
> > > > > The thing is, by not allowing this optimization simple things like eg.
> > > > > pipe-test say very expensive.
> > > > 
> > > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> > > > with exited task") that introduces the problem. Before that things would
> > > > work correctly afaict.
> > > 
> > > hum, I dont think so.. because the perf_remove_from_context set event
> > > to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
> > > disabled
> > 
> > Urgh, see I knew I was missing something.
> > 
> > Can't we fix that? Lemme check to see what relies on this.
> 
> 2e2af50b1fab ("perf_events: Disable events when we detach them")
> 
> Seems to be about it. And I think we should solve that differently, but
> the best I can come up with ties into the event->ctx mess we have in
> that other thread.
> 
> The thing is, IOC_ENABLE/DISABLE and read() and such should act
> (sanely and) independent from the attached state.
> 
> Its just that the whole event->ctx migration mess is making this
> somewhat hard atm.
> 
> So things like perf_event_read() should not only check ctx->is_active
> but also worry about event->attach_state & PERF_ATTACH_CONTEXT.
> 
> Now the biggest problem is that we cannot tell if its a temporary state
> (move_group / migrate_context) or permanent (exit)... 
> 
> Urgh

I just noticed that we initialize the child state with base parent
state not the real (immediate) parent.. which is what we want IMO

I wonder attached patch could fix the issue mentioned in:
  1f9a726 perf: Do not allow optimized switch for non-cloned events

now I need to recall what I used to test this ;-)

jirka


---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e4d6924..561a4ea 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event,
  struct perf_event *group_leader,
  struct perf_event_context *child_ctx)
 {
+   enum perf_event_active_state parent_state = parent_event->state;
struct perf_event *child_event;
unsigned long flags;
 
@@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event,
 * not its attr.disabled bit.  We hold the parent's mutex,
 * so we won't race with perf_event_{en, dis}able_family.
 */
-   if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
+   if (parent_state >= PERF_EVENT_STATE_INACTIVE)
child_event->state = PERF_EVENT_STATE_INACTIVE;
else
child_event->state = PERF_EVENT_STATE_OFF;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
> > On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> > > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> > > 
> > > > > The thing is; I don't understand those reasons. That commit log 
> > > > > doesn't
> > > > > explain.
> > > > 
> > > > Ah wait, I finally see. I think we want to fix that exit path, not
> > > > disallow the cloning.
> > > > 
> > > > The thing is, by not allowing this optimization simple things like eg.
> > > > pipe-test say very expensive.
> > > 
> > > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> > > with exited task") that introduces the problem. Before that things would
> > > work correctly afaict.
> > 
> > hum, I dont think so.. because the perf_remove_from_context set event
> > to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
> > disabled
> 
> Urgh, see I knew I was missing something.
> 
> Can't we fix that? Lemme check to see what relies on this.

2e2af50b1fab ("perf_events: Disable events when we detach them")

Seems to be about it. And I think we should solve that differently, but
the best I can come up with ties into the event->ctx mess we have in
that other thread.

The thing is, IOC_ENABLE/DISABLE and read() and such should act
(sanely and) independent from the attached state.

Its just that the whole event->ctx migration mess is making this
somewhat hard atm.

So things like perf_event_read() should not only check ctx->is_active
but also worry about event->attach_state & PERF_ATTACH_CONTEXT.

Now the biggest problem is that we cannot tell if its a temporary state
(move_group / migrate_context) or permanent (exit)... 

Urgh


pgpp92w5dLyFX.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
> On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> > 
> > > > The thing is; I don't understand those reasons. That commit log doesn't
> > > > explain.
> > > 
> > > Ah wait, I finally see. I think we want to fix that exit path, not
> > > disallow the cloning.
> > > 
> > > The thing is, by not allowing this optimization simple things like eg.
> > > pipe-test say very expensive.
> > 
> > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> > with exited task") that introduces the problem. Before that things would
> > work correctly afaict.
> 
> hum, I dont think so.. because the perf_remove_from_context set event
> to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
> disabled

Urgh, see I knew I was missing something.

Can't we fix that? Lemme check to see what relies on this.


pgpPkoqTrGmCQ.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 02:19:52PM +0200, Jiri Olsa wrote:
> > I have another 'problem' with 179033b3e064. What if you 'want' to
> > continue monitoring after the initial task died? Eg. if you want to
> > monitor crap that unconditionally daemonizes.
> 
> right.. did not think of that.. need to check more, but
> seems like just the check for children should be enough
> 

Indeed, that should work.

> ---
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index bf482ccbdbe1..341d0b47ca14 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event 
> *event,
>   return n * sizeof(u64);
>  }
>  
> +static bool is_event_hup(struct perf_event *event)
> +{
> + bool no_children;
> +
> + if (event->state != PERF_EVENT_STATE_EXIT)
> + return false;
> +
> + mutex_lock(>child_mutex);
> + no_children = list_empty(>child_list);
> + mutex_unlock(>child_mutex);
> + return no_children;
> +}
> +
>  /*
>   * Read the performance event - simple non blocking version for now
>   */
> @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user 
> *buf, size_t count)
>* error state (i.e. because it was pinned but it couldn't be
>* scheduled on to the CPU at some point).
>*/
> - if ((event->state == PERF_EVENT_STATE_ERROR) ||
> - (event->state == PERF_EVENT_STATE_EXIT))
> + if ((event->state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event)))
>   return 0;

Do we want this? It seems like a fairly sensible thing to start a
counter and wait for the thing to die, only to then read the total
count. But with this on we get 0s.

I suppose Stephane's email got to you after you did this and we should
be dropping this thing entirely?

>   if (count < event->read_size)
> @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, 
> poll_table *wait)
>  
>   poll_wait(file, >waitq, wait);
>  
> - if (event->state == PERF_EVENT_STATE_EXIT)
> + if (is_event_hup(event))
>   return events;
>  
>   /*


pgpBkCjUsbHZz.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> > 
> > > > The thing is; I don't understand those reasons. That commit log doesn't
> > > > explain.
> > > 
> > > Ah wait, I finally see. I think we want to fix that exit path, not
> > > disallow the cloning.
> > > 
> > > The thing is, by not allowing this optimization simple things like eg.
> > > pipe-test say very expensive.
> > 
> > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> > with exited task") that introduces the problem. Before that things would
> > work correctly afaict.
> > 
> > The exit would remove from the context but leave the event in existence.
> > Both the fd and the inherited events would have references to it, only
> > once those are gone do we destroy the actual event.
> 
> I have another 'problem' with 179033b3e064. What if you 'want' to
> continue monitoring after the initial task died? Eg. if you want to
> monitor crap that unconditionally daemonizes.
> 
> So at this point I would suggest we revert both 179033b3e064 and
> 1f9a7268c67f so that the context switch optimization works again for
> simple 2 task things (pipe-test).
> 
> If we re-introduce 179033b3e064 (which I think makes sense) we need to
> make sure it works with the context switch optimization (we could add
> perf_event::task I think, it would clean up a few other things iirc) and
> we'd have to make it conditional so we can still monitor daemons.
> 
> Or am I totally missing things again?

as I said in previous email I dont think the 179033b3e064 introduced
the issue and I think we should keep 1f9a7268c67f to have correct
numbers ;-)

I'll think about the fix to keep non-cloned context in optimized switch

jirka
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> > 
> > > > The thing is; I don't understand those reasons. That commit log doesn't
> > > > explain.
> > > 
> > > Ah wait, I finally see. I think we want to fix that exit path, not
> > > disallow the cloning.
> > > 
> > > The thing is, by not allowing this optimization simple things like eg.
> > > pipe-test say very expensive.
> > 
> > So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> > with exited task") that introduces the problem. Before that things would
> > work correctly afaict.
> > 
> > The exit would remove from the context but leave the event in existence.
> > Both the fd and the inherited events would have references to it, only
> > once those are gone do we destroy the actual event.
> 
> I have another 'problem' with 179033b3e064. What if you 'want' to
> continue monitoring after the initial task died? Eg. if you want to
> monitor crap that unconditionally daemonizes.

right.. did not think of that.. need to check more, but
seems like just the check for children should be enough

jirka


---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bf482ccbdbe1..341d0b47ca14 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event *event,
return n * sizeof(u64);
 }
 
+static bool is_event_hup(struct perf_event *event)
+{
+   bool no_children;
+
+   if (event->state != PERF_EVENT_STATE_EXIT)
+   return false;
+
+   mutex_lock(>child_mutex);
+   no_children = list_empty(>child_list);
+   mutex_unlock(>child_mutex);
+   return no_children;
+}
+
 /*
  * Read the performance event - simple non blocking version for now
  */
@@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, 
size_t count)
 * error state (i.e. because it was pinned but it couldn't be
 * scheduled on to the CPU at some point).
 */
-   if ((event->state == PERF_EVENT_STATE_ERROR) ||
-   (event->state == PERF_EVENT_STATE_EXIT))
+   if ((event->state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event)))
return 0;
 
if (count < event->read_size)
@@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, 
poll_table *wait)
 
poll_wait(file, >waitq, wait);
 
-   if (event->state == PERF_EVENT_STATE_EXIT)
+   if (is_event_hup(event))
return events;
 
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> 
> > > The thing is; I don't understand those reasons. That commit log doesn't
> > > explain.
> > 
> > Ah wait, I finally see. I think we want to fix that exit path, not
> > disallow the cloning.
> > 
> > The thing is, by not allowing this optimization simple things like eg.
> > pipe-test say very expensive.
> 
> So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> with exited task") that introduces the problem. Before that things would
> work correctly afaict.

hum, I dont think so.. because the perf_remove_from_context set event
to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
disabled

the PERF_EVENT_STATE_EXIT should be just notification for poll to return HUP

> The exit would remove from the context but leave the event in existence.
> Both the fd and the inherited events would have references to it, only
> once those are gone do we destroy the actual event.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
> 
> > > The thing is; I don't understand those reasons. That commit log doesn't
> > > explain.
> > 
> > Ah wait, I finally see. I think we want to fix that exit path, not
> > disallow the cloning.
> > 
> > The thing is, by not allowing this optimization simple things like eg.
> > pipe-test say very expensive.
> 
> So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
> with exited task") that introduces the problem. Before that things would
> work correctly afaict.
> 
> The exit would remove from the context but leave the event in existence.
> Both the fd and the inherited events would have references to it, only
> once those are gone do we destroy the actual event.

I have another 'problem' with 179033b3e064. What if you 'want' to
continue monitoring after the initial task died? Eg. if you want to
monitor crap that unconditionally daemonizes.

So at this point I would suggest we revert both 179033b3e064 and
1f9a7268c67f so that the context switch optimization works again for
simple 2 task things (pipe-test).

If we re-introduce 179033b3e064 (which I think makes sense) we need to
make sure it works with the context switch optimization (we could add
perf_event::task I think, it would clean up a few other things iirc) and
we'd have to make it conditional so we can still monitor daemons.

Or am I totally missing things again?


pgp2O9F2dr1nX.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:

> > The thing is; I don't understand those reasons. That commit log doesn't
> > explain.
> 
> Ah wait, I finally see. I think we want to fix that exit path, not
> disallow the cloning.
> 
> The thing is, by not allowing this optimization simple things like eg.
> pipe-test say very expensive.

So its 179033b3e064 ("perf: Add PERF_EVENT_STATE_EXIT state for events
with exited task") that introduces the problem. Before that things would
work correctly afaict.

The exit would remove from the context but leave the event in existence.
Both the fd and the inherited events would have references to it, only
once those are gone do we destroy the actual event.




pgpQxWGnfhLc1.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:45:48AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote:
> > On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
> > > On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
> > > > As described in commit:
> > > >   1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
> > > > we dont allow optimized switch for non cloned contexts.
> > > > 
> > > > There's no need now to check if one of the context is parent
> > > > of the other in context_equiv function.
> > > > 
> > > 
> > > OK, so talk to me about that patch; it looks like you're slowly
> > > reverting: 5a3126d4fe7c ("perf: Fix the perf context switch
> > > optimization").
> > > 
> > > So what exactly is the problem? I'm still jet-lagged and not seeing it.
> > 
> > the problem is, that we cannot allow non-cloned context
> > to be part of the optimized switch for reasons explained
> > in commit 1f9a7268c67f
> 
> The thing is; I don't understand those reasons. That commit log doesn't
> explain.

Ah wait, I finally see. I think we want to fix that exit path, not
disallow the cloning.

The thing is, by not allowing this optimization simple things like eg.
pipe-test say very expensive.


pgpwhKka6o5ck.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote:
> On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
> > > As described in commit:
> > >   1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
> > > we dont allow optimized switch for non cloned contexts.
> > > 
> > > There's no need now to check if one of the context is parent
> > > of the other in context_equiv function.
> > > 
> > 
> > OK, so talk to me about that patch; it looks like you're slowly
> > reverting: 5a3126d4fe7c ("perf: Fix the perf context switch
> > optimization").
> > 
> > So what exactly is the problem? I'm still jet-lagged and not seeing it.
> 
> the problem is, that we cannot allow non-cloned context
> to be part of the optimized switch for reasons explained
> in commit 1f9a7268c67f

The thing is; I don't understand those reasons. That commit log doesn't
explain.


pgpkWKICXtVL4.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
> > As described in commit:
> >   1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
> > we dont allow optimized switch for non cloned contexts.
> > 
> > There's no need now to check if one of the context is parent
> > of the other in context_equiv function.
> > 
> 
> OK, so talk to me about that patch; it looks like you're slowly
> reverting: 5a3126d4fe7c ("perf: Fix the perf context switch
> optimization").
> 
> So what exactly is the problem? I'm still jet-lagged and not seeing it.

the problem is, that we cannot allow non-cloned context
to be part of the optimized switch for reasons explained
in commit 1f9a7268c67f

I did not realize there was a just single commit introducing
non-cloned context switch to revert ;-) attached

I haven't tested it yet.. will do with my other changes

thanks,
jirka


---
This reverts commit 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef.

As explained in commit:
  1f9a7268c67f perf: Do not allow optimized switch for non-cloned events

we cannot allow optimized switch for non cloned contexts. It
suppressed it by forcing the condition for parent contexts.

But what we actually should do is to revert the full commit that
introduced the optimized switch for non-cloned contexts.
This way we get rid of unneeded (now) ctx->generation updates
for non-cloned contexts.
---
 kernel/events/core.c | 64 +++-
 1 file changed, 18 insertions(+), 46 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8cb4d21a346..424e6d3b07b7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -916,7 +916,6 @@ static void unclone_ctx(struct perf_event_context *ctx)
put_ctx(ctx->parent_ctx);
ctx->parent_ctx = NULL;
}
-   ctx->generation++;
 }
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1154,8 +1153,6 @@ list_add_event(struct perf_event *event, struct 
perf_event_context *ctx)
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
-
-   ctx->generation++;
 }
 
 /*
@@ -1333,8 +1330,6 @@ list_del_event(struct perf_event *event, struct 
perf_event_context *ctx)
 */
if (event->state > PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_OFF;
-
-   ctx->generation++;
 }
 
 static void perf_group_detach(struct perf_event *event)
@@ -2243,38 +2238,22 @@ static void ctx_sched_out(struct perf_event_context 
*ctx,
 }
 
 /*
- * Test whether two contexts are equivalent, i.e. whether they have both been
- * cloned from the same version of the same context.
- *
- * Equivalence is measured using a generation number in the context that is
- * incremented on each modification to it; see unclone_ctx(), list_add_event()
- * and list_del_event().
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
  */
 static int context_equiv(struct perf_event_context *ctx1,
 struct perf_event_context *ctx2)
 {
-   /* Pinning disables the swap optimization */
-   if (ctx1->pin_count || ctx2->pin_count)
-   return 0;
-
-   /* If ctx1 is the parent of ctx2 */
-   if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
-   return 1;
-
-   /* If ctx2 is the parent of ctx1 */
-   if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
-   return 1;
-
-   /*
-* If ctx1 and ctx2 have the same parent; we flatten the parent
-* hierarchy, see perf_event_init_context().
-*/
-   if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
-   ctx1->parent_gen == ctx2->parent_gen)
-   return 1;
-
-   /* Unmatched */
-   return 0;
+   return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+   && ctx1->parent_gen == ctx2->parent_gen
+   && !ctx1->pin_count && !ctx2->pin_count;
 }
 
 static void __perf_event_sync_stat(struct perf_event *event,
@@ -2354,7 +2333,7 @@ static void perf_event_context_sched_out(struct 
task_struct *task, int ctxn,
 {
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
-   struct perf_event_context *parent, *next_parent;
+   struct 

Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
 On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
  As described in commit:
1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
  we dont allow optimized switch for non cloned contexts.
  
  There's no need now to check if one of the context is parent
  of the other in context_equiv function.
  
 
 OK, so talk to me about that patch; it looks like you're slowly
 reverting: 5a3126d4fe7c (perf: Fix the perf context switch
 optimization).
 
 So what exactly is the problem? I'm still jet-lagged and not seeing it.

the problem is, that we cannot allow non-cloned context
to be part of the optimized switch for reasons explained
in commit 1f9a7268c67f

I did not realize there was a just single commit introducing
non-cloned context switch to revert ;-) attached

I haven't tested it yet.. will do with my other changes

thanks,
jirka


---
This reverts commit 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef.

As explained in commit:
  1f9a7268c67f perf: Do not allow optimized switch for non-cloned events

we cannot allow optimized switch for non cloned contexts. It
suppressed it by forcing the condition for parent contexts.

But what we actually should do is to revert the full commit that
introduced the optimized switch for non-cloned contexts.
This way we get rid of unneeded (now) ctx-generation updates
for non-cloned contexts.
---
 kernel/events/core.c | 64 +++-
 1 file changed, 18 insertions(+), 46 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8cb4d21a346..424e6d3b07b7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -916,7 +916,6 @@ static void unclone_ctx(struct perf_event_context *ctx)
put_ctx(ctx-parent_ctx);
ctx-parent_ctx = NULL;
}
-   ctx-generation++;
 }
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1154,8 +1153,6 @@ list_add_event(struct perf_event *event, struct 
perf_event_context *ctx)
ctx-nr_events++;
if (event-attr.inherit_stat)
ctx-nr_stat++;
-
-   ctx-generation++;
 }
 
 /*
@@ -1333,8 +1330,6 @@ list_del_event(struct perf_event *event, struct 
perf_event_context *ctx)
 */
if (event-state  PERF_EVENT_STATE_OFF)
event-state = PERF_EVENT_STATE_OFF;
-
-   ctx-generation++;
 }
 
 static void perf_group_detach(struct perf_event *event)
@@ -2243,38 +2238,22 @@ static void ctx_sched_out(struct perf_event_context 
*ctx,
 }
 
 /*
- * Test whether two contexts are equivalent, i.e. whether they have both been
- * cloned from the same version of the same context.
- *
- * Equivalence is measured using a generation number in the context that is
- * incremented on each modification to it; see unclone_ctx(), list_add_event()
- * and list_del_event().
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
  */
 static int context_equiv(struct perf_event_context *ctx1,
 struct perf_event_context *ctx2)
 {
-   /* Pinning disables the swap optimization */
-   if (ctx1-pin_count || ctx2-pin_count)
-   return 0;
-
-   /* If ctx1 is the parent of ctx2 */
-   if (ctx1 == ctx2-parent_ctx  ctx1-generation == ctx2-parent_gen)
-   return 1;
-
-   /* If ctx2 is the parent of ctx1 */
-   if (ctx1-parent_ctx == ctx2  ctx1-parent_gen == ctx2-generation)
-   return 1;
-
-   /*
-* If ctx1 and ctx2 have the same parent; we flatten the parent
-* hierarchy, see perf_event_init_context().
-*/
-   if (ctx1-parent_ctx  ctx1-parent_ctx == ctx2-parent_ctx 
-   ctx1-parent_gen == ctx2-parent_gen)
-   return 1;
-
-   /* Unmatched */
-   return 0;
+   return ctx1-parent_ctx  ctx1-parent_ctx == ctx2-parent_ctx
+ctx1-parent_gen == ctx2-parent_gen
+!ctx1-pin_count  !ctx2-pin_count;
 }
 
 static void __perf_event_sync_stat(struct perf_event *event,
@@ -2354,7 +2333,7 @@ static void perf_event_context_sched_out(struct 
task_struct *task, int ctxn,
 {
struct perf_event_context *ctx = task-perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
-   struct perf_event_context *parent, *next_parent;
+   struct perf_event_context *parent;
struct perf_cpu_context *cpuctx;
int 

Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote:
 On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
  On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
   As described in commit:
 1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
   we dont allow optimized switch for non cloned contexts.
   
   There's no need now to check if one of the context is parent
   of the other in context_equiv function.
   
  
  OK, so talk to me about that patch; it looks like you're slowly
  reverting: 5a3126d4fe7c (perf: Fix the perf context switch
  optimization).
  
  So what exactly is the problem? I'm still jet-lagged and not seeing it.
 
 the problem is, that we cannot allow non-cloned context
 to be part of the optimized switch for reasons explained
 in commit 1f9a7268c67f

The thing is; I don't understand those reasons. That commit log doesn't
explain.


pgpkWKICXtVL4.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:45:48AM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 11:43:48AM +0200, Jiri Olsa wrote:
  On Tue, Sep 02, 2014 at 12:50:36PM +0200, Peter Zijlstra wrote:
   On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
As described in commit:
  1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
we dont allow optimized switch for non cloned contexts.

There's no need now to check if one of the context is parent
of the other in context_equiv function.

   
   OK, so talk to me about that patch; it looks like you're slowly
   reverting: 5a3126d4fe7c (perf: Fix the perf context switch
   optimization).
   
   So what exactly is the problem? I'm still jet-lagged and not seeing it.
  
  the problem is, that we cannot allow non-cloned context
  to be part of the optimized switch for reasons explained
  in commit 1f9a7268c67f
 
 The thing is; I don't understand those reasons. That commit log doesn't
 explain.

Ah wait, I finally see. I think we want to fix that exit path, not
disallow the cloning.

The thing is, by not allowing this optimization simple things like eg.
pipe-test say very expensive.


pgpwhKka6o5ck.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:

  The thing is; I don't understand those reasons. That commit log doesn't
  explain.
 
 Ah wait, I finally see. I think we want to fix that exit path, not
 disallow the cloning.
 
 The thing is, by not allowing this optimization simple things like eg.
 pipe-test say very expensive.

So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
with exited task) that introduces the problem. Before that things would
work correctly afaict.

The exit would remove from the context but leave the event in existence.
Both the fd and the inherited events would have references to it, only
once those are gone do we destroy the actual event.




pgpQxWGnfhLc1.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
 
   The thing is; I don't understand those reasons. That commit log doesn't
   explain.
  
  Ah wait, I finally see. I think we want to fix that exit path, not
  disallow the cloning.
  
  The thing is, by not allowing this optimization simple things like eg.
  pipe-test say very expensive.
 
 So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
 with exited task) that introduces the problem. Before that things would
 work correctly afaict.
 
 The exit would remove from the context but leave the event in existence.
 Both the fd and the inherited events would have references to it, only
 once those are gone do we destroy the actual event.

I have another 'problem' with 179033b3e064. What if you 'want' to
continue monitoring after the initial task died? Eg. if you want to
monitor crap that unconditionally daemonizes.

So at this point I would suggest we revert both 179033b3e064 and
1f9a7268c67f so that the context switch optimization works again for
simple 2 task things (pipe-test).

If we re-introduce 179033b3e064 (which I think makes sense) we need to
make sure it works with the context switch optimization (we could add
perf_event::task I think, it would clean up a few other things iirc) and
we'd have to make it conditional so we can still monitor daemons.

Or am I totally missing things again?


pgp2O9F2dr1nX.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
 
   The thing is; I don't understand those reasons. That commit log doesn't
   explain.
  
  Ah wait, I finally see. I think we want to fix that exit path, not
  disallow the cloning.
  
  The thing is, by not allowing this optimization simple things like eg.
  pipe-test say very expensive.
 
 So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
 with exited task) that introduces the problem. Before that things would
 work correctly afaict.

hum, I dont think so.. because the perf_remove_from_context set event
to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
disabled

the PERF_EVENT_STATE_EXIT should be just notification for poll to return HUP

 The exit would remove from the context but leave the event in existence.
 Both the fd and the inherited events would have references to it, only
 once those are gone do we destroy the actual event.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
  On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
  
The thing is; I don't understand those reasons. That commit log doesn't
explain.
   
   Ah wait, I finally see. I think we want to fix that exit path, not
   disallow the cloning.
   
   The thing is, by not allowing this optimization simple things like eg.
   pipe-test say very expensive.
  
  So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
  with exited task) that introduces the problem. Before that things would
  work correctly afaict.
  
  The exit would remove from the context but leave the event in existence.
  Both the fd and the inherited events would have references to it, only
  once those are gone do we destroy the actual event.
 
 I have another 'problem' with 179033b3e064. What if you 'want' to
 continue monitoring after the initial task died? Eg. if you want to
 monitor crap that unconditionally daemonizes.

right.. did not think of that.. need to check more, but
seems like just the check for children should be enough

jirka


---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bf482ccbdbe1..341d0b47ca14 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event *event,
return n * sizeof(u64);
 }
 
+static bool is_event_hup(struct perf_event *event)
+{
+   bool no_children;
+
+   if (event-state != PERF_EVENT_STATE_EXIT)
+   return false;
+
+   mutex_lock(event-child_mutex);
+   no_children = list_empty(event-child_list);
+   mutex_unlock(event-child_mutex);
+   return no_children;
+}
+
 /*
  * Read the performance event - simple non blocking version for now
  */
@@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, 
size_t count)
 * error state (i.e. because it was pinned but it couldn't be
 * scheduled on to the CPU at some point).
 */
-   if ((event-state == PERF_EVENT_STATE_ERROR) ||
-   (event-state == PERF_EVENT_STATE_EXIT))
+   if ((event-state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event)))
return 0;
 
if (count  event-read_size)
@@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, 
poll_table *wait)
 
poll_wait(file, event-waitq, wait);
 
-   if (event-state == PERF_EVENT_STATE_EXIT)
+   if (is_event_hup(event))
return events;
 
/*
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 01:39:58PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
  On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
  
The thing is; I don't understand those reasons. That commit log doesn't
explain.
   
   Ah wait, I finally see. I think we want to fix that exit path, not
   disallow the cloning.
   
   The thing is, by not allowing this optimization simple things like eg.
   pipe-test say very expensive.
  
  So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
  with exited task) that introduces the problem. Before that things would
  work correctly afaict.
  
  The exit would remove from the context but leave the event in existence.
  Both the fd and the inherited events would have references to it, only
  once those are gone do we destroy the actual event.
 
 I have another 'problem' with 179033b3e064. What if you 'want' to
 continue monitoring after the initial task died? Eg. if you want to
 monitor crap that unconditionally daemonizes.
 
 So at this point I would suggest we revert both 179033b3e064 and
 1f9a7268c67f so that the context switch optimization works again for
 simple 2 task things (pipe-test).
 
 If we re-introduce 179033b3e064 (which I think makes sense) we need to
 make sure it works with the context switch optimization (we could add
 perf_event::task I think, it would clean up a few other things iirc) and
 we'd have to make it conditional so we can still monitor daemons.
 
 Or am I totally missing things again?

as I said in previous email I dont think the 179033b3e064 introduced
the issue and I think we should keep 1f9a7268c67f to have correct
numbers ;-)

I'll think about the fix to keep non-cloned context in optimized switch

jirka
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 02:19:52PM +0200, Jiri Olsa wrote:
  I have another 'problem' with 179033b3e064. What if you 'want' to
  continue monitoring after the initial task died? Eg. if you want to
  monitor crap that unconditionally daemonizes.
 
 right.. did not think of that.. need to check more, but
 seems like just the check for children should be enough
 

Indeed, that should work.

 ---
 diff --git a/kernel/events/core.c b/kernel/events/core.c
 index bf482ccbdbe1..341d0b47ca14 100644
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
 @@ -3568,6 +3568,19 @@ static int perf_event_read_one(struct perf_event 
 *event,
   return n * sizeof(u64);
  }
  
 +static bool is_event_hup(struct perf_event *event)
 +{
 + bool no_children;
 +
 + if (event-state != PERF_EVENT_STATE_EXIT)
 + return false;
 +
 + mutex_lock(event-child_mutex);
 + no_children = list_empty(event-child_list);
 + mutex_unlock(event-child_mutex);
 + return no_children;
 +}
 +
  /*
   * Read the performance event - simple non blocking version for now
   */
 @@ -3582,8 +3595,7 @@ perf_read_hw(struct perf_event *event, char __user 
 *buf, size_t count)
* error state (i.e. because it was pinned but it couldn't be
* scheduled on to the CPU at some point).
*/
 - if ((event-state == PERF_EVENT_STATE_ERROR) ||
 - (event-state == PERF_EVENT_STATE_EXIT))
 + if ((event-state == PERF_EVENT_STATE_ERROR) || (is_event_hup(event)))
   return 0;

Do we want this? It seems like a fairly sensible thing to start a
counter and wait for the thing to die, only to then read the total
count. But with this on we get 0s.

I suppose Stephane's email got to you after you did this and we should
be dropping this thing entirely?

   if (count  event-read_size)
 @@ -3614,7 +3626,7 @@ static unsigned int perf_poll(struct file *file, 
 poll_table *wait)
  
   poll_wait(file, event-waitq, wait);
  
 - if (event-state == PERF_EVENT_STATE_EXIT)
 + if (is_event_hup(event))
   return events;
  
   /*


pgpBkCjUsbHZz.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
 On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
  On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
  
The thing is; I don't understand those reasons. That commit log doesn't
explain.
   
   Ah wait, I finally see. I think we want to fix that exit path, not
   disallow the cloning.
   
   The thing is, by not allowing this optimization simple things like eg.
   pipe-test say very expensive.
  
  So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
  with exited task) that introduces the problem. Before that things would
  work correctly afaict.
 
 hum, I dont think so.. because the perf_remove_from_context set event
 to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
 disabled

Urgh, see I knew I was missing something.

Can't we fix that? Lemme check to see what relies on this.


pgpPkoqTrGmCQ.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Peter Zijlstra
On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
  On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
   On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:
   
 The thing is; I don't understand those reasons. That commit log 
 doesn't
 explain.

Ah wait, I finally see. I think we want to fix that exit path, not
disallow the cloning.

The thing is, by not allowing this optimization simple things like eg.
pipe-test say very expensive.
   
   So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
   with exited task) that introduces the problem. Before that things would
   work correctly afaict.
  
  hum, I dont think so.. because the perf_remove_from_context set event
  to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
  disabled
 
 Urgh, see I knew I was missing something.
 
 Can't we fix that? Lemme check to see what relies on this.

2e2af50b1fab (perf_events: Disable events when we detach them)

Seems to be about it. And I think we should solve that differently, but
the best I can come up with ties into the event-ctx mess we have in
that other thread.

The thing is, IOC_ENABLE/DISABLE and read() and such should act
(sanely and) independent from the attached state.

Its just that the whole event-ctx migration mess is making this
somewhat hard atm.

So things like perf_event_read() should not only check ctx-is_active
but also worry about event-attach_state  PERF_ATTACH_CONTEXT.

Now the biggest problem is that we cannot tell if its a temporary state
(move_group / migrate_context) or permanent (exit)... 

Urgh


pgpp92w5dLyFX.pgp
Description: PGP signature


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-08 Thread Jiri Olsa
On Mon, Sep 08, 2014 at 05:13:05PM +0200, Peter Zijlstra wrote:
 On Mon, Sep 08, 2014 at 03:34:28PM +0200, Peter Zijlstra wrote:
  On Mon, Sep 08, 2014 at 02:01:41PM +0200, Jiri Olsa wrote:
   On Mon, Sep 08, 2014 at 12:01:22PM +0200, Peter Zijlstra wrote:
On Mon, Sep 08, 2014 at 11:48:55AM +0200, Peter Zijlstra wrote:

  The thing is; I don't understand those reasons. That commit log 
  doesn't
  explain.
 
 Ah wait, I finally see. I think we want to fix that exit path, not
 disallow the cloning.
 
 The thing is, by not allowing this optimization simple things like eg.
 pipe-test say very expensive.

So its 179033b3e064 (perf: Add PERF_EVENT_STATE_EXIT state for events
with exited task) that introduces the problem. Before that things would
work correctly afaict.
   
   hum, I dont think so.. because the perf_remove_from_context set event
   to PERF_EVENT_STATE_OFF state anyway.. thus making any new cloned events
   disabled
  
  Urgh, see I knew I was missing something.
  
  Can't we fix that? Lemme check to see what relies on this.
 
 2e2af50b1fab (perf_events: Disable events when we detach them)
 
 Seems to be about it. And I think we should solve that differently, but
 the best I can come up with ties into the event-ctx mess we have in
 that other thread.
 
 The thing is, IOC_ENABLE/DISABLE and read() and such should act
 (sanely and) independent from the attached state.
 
 Its just that the whole event-ctx migration mess is making this
 somewhat hard atm.
 
 So things like perf_event_read() should not only check ctx-is_active
 but also worry about event-attach_state  PERF_ATTACH_CONTEXT.
 
 Now the biggest problem is that we cannot tell if its a temporary state
 (move_group / migrate_context) or permanent (exit)... 
 
 Urgh

I just noticed that we initialize the child state with base parent
state not the real (immediate) parent.. which is what we want IMO

I wonder attached patch could fix the issue mentioned in:
  1f9a726 perf: Do not allow optimized switch for non-cloned events

now I need to recall what I used to test this ;-)

jirka


---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e4d6924..561a4ea 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7794,6 +7794,7 @@ inherit_event(struct perf_event *parent_event,
  struct perf_event *group_leader,
  struct perf_event_context *child_ctx)
 {
+   enum perf_event_active_state parent_state = parent_event-state;
struct perf_event *child_event;
unsigned long flags;
 
@@ -7827,7 +7828,7 @@ inherit_event(struct perf_event *parent_event,
 * not its attr.disabled bit.  We hold the parent's mutex,
 * so we won't race with perf_event_{en, dis}able_family.
 */
-   if (parent_event-state = PERF_EVENT_STATE_INACTIVE)
+   if (parent_state = PERF_EVENT_STATE_INACTIVE)
child_event-state = PERF_EVENT_STATE_INACTIVE;
else
child_event-state = PERF_EVENT_STATE_OFF;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-02 Thread Peter Zijlstra
On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
> As described in commit:
>   1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
> we dont allow optimized switch for non cloned contexts.
> 
> There's no need now to check if one of the context is parent
> of the other in context_equiv function.
> 

OK, so talk to me about that patch; it looks like you're slowly
reverting: 5a3126d4fe7c ("perf: Fix the perf context switch
optimization").

So what exactly is the problem? I'm still jet-lagged and not seeing it.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-09-02 Thread Peter Zijlstra
On Mon, Aug 25, 2014 at 04:45:35PM +0200, Jiri Olsa wrote:
 As described in commit:
   1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
 we dont allow optimized switch for non cloned contexts.
 
 There's no need now to check if one of the context is parent
 of the other in context_equiv function.
 

OK, so talk to me about that patch; it looks like you're slowly
reverting: 5a3126d4fe7c (perf: Fix the perf context switch
optimization).

So what exactly is the problem? I'm still jet-lagged and not seeing it.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-08-25 Thread Jiri Olsa
As described in commit:
  1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
we dont allow optimized switch for non cloned contexts.

There's no need now to check if one of the context is parent
of the other in context_equiv function.

Cc: Andi Kleen 
Cc: Arnaldo Carvalho de Melo 
Cc: Corey Ashford 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Ingo Molnar 
Cc: Jen-Cheng(Tommy) Huang 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Stephane Eranian 
Signed-off-by: Jiri Olsa 
---
 kernel/events/core.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8cb4d21a346..4ad4ba2bc106 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2257,14 +2257,6 @@ static int context_equiv(struct perf_event_context *ctx1,
if (ctx1->pin_count || ctx2->pin_count)
return 0;
 
-   /* If ctx1 is the parent of ctx2 */
-   if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
-   return 1;
-
-   /* If ctx2 is the parent of ctx1 */
-   if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
-   return 1;
-
/*
 * If ctx1 and ctx2 have the same parent; we flatten the parent
 * hierarchy, see perf_event_init_context().
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/9] perf: Remove redundant parent context check from context_equiv

2014-08-25 Thread Jiri Olsa
As described in commit:
  1f9a7268c67f perf: Do not allow optimized switch for non-cloned events
we dont allow optimized switch for non cloned contexts.

There's no need now to check if one of the context is parent
of the other in context_equiv function.

Cc: Andi Kleen a...@firstfloor.org
Cc: Arnaldo Carvalho de Melo a...@kernel.org
Cc: Corey Ashford cjash...@linux.vnet.ibm.com
Cc: David Ahern dsah...@gmail.com
Cc: Frederic Weisbecker fweis...@gmail.com
Cc: Ingo Molnar mi...@kernel.org
Cc: Jen-Cheng(Tommy) Huang tomm...@gatech.edu
Cc: Namhyung Kim namhy...@kernel.org
Cc: Paul Mackerras pau...@samba.org
Cc: Peter Zijlstra a.p.zijls...@chello.nl
Cc: Stephane Eranian eran...@google.com
Signed-off-by: Jiri Olsa jo...@kernel.org
---
 kernel/events/core.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8cb4d21a346..4ad4ba2bc106 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2257,14 +2257,6 @@ static int context_equiv(struct perf_event_context *ctx1,
if (ctx1-pin_count || ctx2-pin_count)
return 0;
 
-   /* If ctx1 is the parent of ctx2 */
-   if (ctx1 == ctx2-parent_ctx  ctx1-generation == ctx2-parent_gen)
-   return 1;
-
-   /* If ctx2 is the parent of ctx1 */
-   if (ctx1-parent_ctx == ctx2  ctx1-parent_gen == ctx2-generation)
-   return 1;
-
/*
 * If ctx1 and ctx2 have the same parent; we flatten the parent
 * hierarchy, see perf_event_init_context().
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/