Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-17 Thread Ingo Molnar

* Jason Low  wrote:

> On Thu, 2015-04-16 at 20:24 +0200, Ingo Molnar wrote:
> > Would it make sense to add a few comments to the seq field definition 
> > site(s), about how it's supposed to be accessed - or to the 
> > READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?
> 
> How about this:
> 
> ---
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5a44371..63fa87f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1794,6 +1794,11 @@ static void task_numa_placement(struct task_struct *p)
>   u64 runtime, period;
>   spinlock_t *group_lock = NULL;
>  
> + /*
> +  * The p->mm->numa_scan_seq gets updated without
> +  * exclusive access. Use READ_ONCE() here to ensure
> +  * that the field is read in a single access.
> +  */
>   seq = READ_ONCE(p->mm->numa_scan_seq);
>   if (p->numa_scan_seq == seq)
>   return;
> @@ -2107,6 +2112,13 @@ void task_numa_fault(int last_cpupid, int mem_node, 
> int pages, int flags)
>  
>  static void reset_ptenuma_scan(struct task_struct *p)
>  {
> + /*
> +  * We only did a read acquisition of the mmap sem, so
> +  * p->mm->numa_scan_seq is written to without exclusive access.
> +  * That's not much of an issue though, since this is just used
> +  * for statistical sampling. Use WRITE_ONCE and READ_ONCE, which
> +  * are not expensive, to avoid load/store tearing.
> +  */
>   WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
>   p->mm->numa_scan_offset = 0;
>  }

Perfect! It just needs a changelog and a SOB.

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-17 Thread Ingo Molnar

* Jason Low jason.l...@hp.com wrote:

 On Thu, 2015-04-16 at 20:24 +0200, Ingo Molnar wrote:
  Would it make sense to add a few comments to the seq field definition 
  site(s), about how it's supposed to be accessed - or to the 
  READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?
 
 How about this:
 
 ---
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 index 5a44371..63fa87f 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -1794,6 +1794,11 @@ static void task_numa_placement(struct task_struct *p)
   u64 runtime, period;
   spinlock_t *group_lock = NULL;
  
 + /*
 +  * The p-mm-numa_scan_seq gets updated without
 +  * exclusive access. Use READ_ONCE() here to ensure
 +  * that the field is read in a single access.
 +  */
   seq = READ_ONCE(p-mm-numa_scan_seq);
   if (p-numa_scan_seq == seq)
   return;
 @@ -2107,6 +2112,13 @@ void task_numa_fault(int last_cpupid, int mem_node, 
 int pages, int flags)
  
  static void reset_ptenuma_scan(struct task_struct *p)
  {
 + /*
 +  * We only did a read acquisition of the mmap sem, so
 +  * p-mm-numa_scan_seq is written to without exclusive access.
 +  * That's not much of an issue though, since this is just used
 +  * for statistical sampling. Use WRITE_ONCE and READ_ONCE, which
 +  * are not expensive, to avoid load/store tearing.
 +  */
   WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
   p-mm-numa_scan_offset = 0;
  }

Perfect! It just needs a changelog and a SOB.

Thanks,

Ingo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Jason Low
On Thu, 2015-04-16 at 20:24 +0200, Ingo Molnar wrote:
> Would it make sense to add a few comments to the seq field definition 
> site(s), about how it's supposed to be accessed - or to the 
> READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

How about this:

---
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a44371..63fa87f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1794,6 +1794,11 @@ static void task_numa_placement(struct task_struct *p)
u64 runtime, period;
spinlock_t *group_lock = NULL;
 
+   /*
+* The p->mm->numa_scan_seq gets updated without
+* exclusive access. Use READ_ONCE() here to ensure
+* that the field is read in a single access.
+*/
seq = READ_ONCE(p->mm->numa_scan_seq);
if (p->numa_scan_seq == seq)
return;
@@ -2107,6 +2112,13 @@ void task_numa_fault(int last_cpupid, int mem_node, int 
pages, int flags)
 
 static void reset_ptenuma_scan(struct task_struct *p)
 {
+   /*
+* We only did a read acquisition of the mmap sem, so
+* p->mm->numa_scan_seq is written to without exclusive access.
+* That's not much of an issue though, since this is just used
+* for statistical sampling. Use WRITE_ONCE and READ_ONCE, which
+* are not expensive, to avoid load/store tearing.
+*/
WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
p->mm->numa_scan_offset = 0;
 }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Jason Low
On Thu, 2015-04-16 at 20:15 +0200, Peter Zijlstra wrote:
> On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
> > > ACCESS_ONCE() is not a compiler barrier
> > 
> > It's not a general compiler barrier (and I didn't claim so) but it is 
> > still a compiler barrier: it's documented as a weak, variable specific 
> > barrier in Documentation/memor-barriers.txt:
> 
> Ok, fair enough. I just don't generally think of them as 'barriers'.
> 
> > > The 'read' side uses ACCESS_ONCE() for two purposes:
> > >  - to load the value once, we don't want the seq number to change under
> > >us for obvious reasons
> > >  - to avoid load tearing and observe weird seq numbers
> > > 
> > > The update side uses ACCESS_ONCE() to avoid write tearing, and 
> > > strictly speaking it should also worry about read-tearing since its 
> > > not hard serialized, although its very unlikely to actually have 
> > > concurrency (IIRC).
> 
> > This is what I meant by that there's no harm from this race.
> 
> Ok, but I would still like to preserve the READ one on the usage site
> and the WRITE one on the update side, if only as documentation that
> there's something 'special' happening.

In that case, in our patch 2, I suppose we also want to use READ_ONCE()
when accessing the running field, which also helps document that we're
reading and writing to a non-atomic value that gets accessed without a
lock.

> And while the effects here might end up being statistical noise, I have
> conceptual problems with re-reading seq counts, that's not proper.
> 
> And its not like they really cost anything.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Paul E. McKenney
On Thu, Apr 16, 2015 at 09:02:08PM +0200, Peter Zijlstra wrote:
> On Thu, Apr 16, 2015 at 08:24:27PM +0200, Ingo Molnar wrote:
> > Yes ... but that still leaves this weird feeling that it's really 
> > still a bit wrong because it's not proper parallel code, we just 
> > reduced the probability of the remaining races radically. And it's not 
> > like GCC (or any compiler) does load tearing or even store tearing 
> > under normal -O2 for such code patterns, right?
> 
> I think Paul once caught GCC doing something silly, but typically no.
> The re-loads however have been frequently observed.

Too true!

Some architectures do split stores of constants.  For example, given
an architecture with a store-immediate instruction with (say) a four-bit 
immediate field, gcc can compile this:

x = 0x00020008;

to something like:

st $2, (x+2)
st $8, (x)

And gcc was doing this even though the store to x had volatile semantics,
a bug which has thankfully since been fixed.

But then again, I am paranoid.  So I would not put it past gcc to think
to itself "Hmmm...  I just loaded x a few instructions back, and only
clobbered the low-order byte.  So I will just reload that byte into
low-order byte of the register containing the remnants of the previous
load."

No, I have never seen gcc do that, but a C compiler could do that and
still claim to be complying with the standard.  :-/

Thanx, Paul

> > > And its not like they really cost anything.
> > 
> > That's true.
> > 
> > Would it make sense to add a few comments to the seq field definition 
> > site(s), about how it's supposed to be accessed - or to the 
> > READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?
> 
> For sure, can do a comment no problem.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Thu, Apr 16, 2015 at 08:24:27PM +0200, Ingo Molnar wrote:
> Yes ... but that still leaves this weird feeling that it's really 
> still a bit wrong because it's not proper parallel code, we just 
> reduced the probability of the remaining races radically. And it's not 
> like GCC (or any compiler) does load tearing or even store tearing 
> under normal -O2 for such code patterns, right?

I think Paul once caught GCC doing something silly, but typically no.
The re-loads however have been frequently observed.

> > And its not like they really cost anything.
> 
> That's true.
> 
> Would it make sense to add a few comments to the seq field definition 
> site(s), about how it's supposed to be accessed - or to the 
> READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

For sure, can do a comment no problem.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Ingo Molnar

* Peter Zijlstra  wrote:

> On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
> > > ACCESS_ONCE() is not a compiler barrier
> > 
> > It's not a general compiler barrier (and I didn't claim so) but it is 
> > still a compiler barrier: it's documented as a weak, variable specific 
> > barrier in Documentation/memor-barriers.txt:
> 
> Ok, fair enough. I just don't generally think of them as 'barriers'.
> 
> > > The 'read' side uses ACCESS_ONCE() for two purposes:
> > >  - to load the value once, we don't want the seq number to change under
> > >us for obvious reasons
> > >  - to avoid load tearing and observe weird seq numbers
> > > 
> > > The update side uses ACCESS_ONCE() to avoid write tearing, and 
> > > strictly speaking it should also worry about read-tearing since its 
> > > not hard serialized, although its very unlikely to actually have 
> > > concurrency (IIRC).
> 
> > This is what I meant by that there's no harm from this race.
> 
> Ok, but I would still like to preserve the READ one on the usage 
> site and the WRITE one on the update side, if only as documentation 
> that there's something 'special' happening.
> 
> And while the effects here might end up being statistical noise, I 
> have conceptual problems with re-reading seq counts, that's not 
> proper.

Yes ... but that still leaves this weird feeling that it's really 
still a bit wrong because it's not proper parallel code, we just 
reduced the probability of the remaining races radically. And it's not 
like GCC (or any compiler) does load tearing or even store tearing 
under normal -O2 for such code patterns, right?

> 
> And its not like they really cost anything.

That's true.

Would it make sense to add a few comments to the seq field definition 
site(s), about how it's supposed to be accessed - or to the 
READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
> > ACCESS_ONCE() is not a compiler barrier
> 
> It's not a general compiler barrier (and I didn't claim so) but it is 
> still a compiler barrier: it's documented as a weak, variable specific 
> barrier in Documentation/memor-barriers.txt:

Ok, fair enough. I just don't generally think of them as 'barriers'.

> > The 'read' side uses ACCESS_ONCE() for two purposes:
> >  - to load the value once, we don't want the seq number to change under
> >us for obvious reasons
> >  - to avoid load tearing and observe weird seq numbers
> > 
> > The update side uses ACCESS_ONCE() to avoid write tearing, and 
> > strictly speaking it should also worry about read-tearing since its 
> > not hard serialized, although its very unlikely to actually have 
> > concurrency (IIRC).

> This is what I meant by that there's no harm from this race.

Ok, but I would still like to preserve the READ one on the usage site
and the WRITE one on the update side, if only as documentation that
there's something 'special' happening.

And while the effects here might end up being statistical noise, I have
conceptual problems with re-reading seq counts, that's not proper.

And its not like they really cost anything.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Ingo Molnar

* Peter Zijlstra  wrote:

> On Wed, Apr 15, 2015 at 09:46:01AM +0200, Ingo Molnar wrote:
> 
>  > @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
> int pages, int flags)
>  >  
>  >  static void reset_ptenuma_scan(struct task_struct *p)
>  >  {
>  > -  ACCESS_ONCE(p->mm->numa_scan_seq)++;
>  > +  WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
>  
> vs
> 
>   seq = ACCESS_ONCE(p->mm->numa_scan_seq);
>   if (p->numa_scan_seq == seq)
>   return;
>   p->numa_scan_seq = seq;
> 
> 
> > So the original ACCESS_ONCE() barriers were misguided to begin with: I 
> > think they tried to handle races with the scheduler balancing softirq 
> > and tried to avoid having to use atomics for the sequence counter 
> > (which would be overkill), but things like ACCESS_ONCE(x)++ never 
> > guaranteed atomicity (or even coherency) of the update.
> > 
> > But since in reality this is only statistical sampling code, all these 
> > compiler barriers can be removed I think. Peter, Mel, Rik, do you 
> > agree?
> 
> ACCESS_ONCE() is not a compiler barrier

It's not a general compiler barrier (and I didn't claim so) but it is 
still a compiler barrier: it's documented as a weak, variable specific 
barrier in Documentation/memor-barriers.txt:

  COMPILER BARRIER
  

  The Linux kernel has an explicit compiler barrier function that  prevents the
  compiler from moving the memory accesses either side of it to the  other side:

barrier();

  This is a general barrier -- there are no read-read or write-write variants
  of barrier().  However, ACCESS_ONCE() can be thought of as a weak form
  for barrier() that affects only the specific accesses flagged by the
  ACCESS_ONCE().

 [...]

> The 'read' side uses ACCESS_ONCE() for two purposes:
>  - to load the value once, we don't want the seq number to change under
>us for obvious reasons
>  - to avoid load tearing and observe weird seq numbers
> 
> The update side uses ACCESS_ONCE() to avoid write tearing, and 
> strictly speaking it should also worry about read-tearing since its 
> not hard serialized, although its very unlikely to actually have 
> concurrency (IIRC).

So what bad effects can there be from the very unlikely read and write 
tearing?

AFAICS nothing particularly bad. On the read side:

seq = ACCESS_ONCE(p->mm->numa_scan_seq);
if (p->numa_scan_seq == seq)
return;
p->numa_scan_seq = seq;

If p->mm->numa_scan_seq gets loaded twice (very unlikely), and two 
different values happen, then we might get a 'double' NUMA placement 
run - i.e. statistical noise.

On the update side:

ACCESS_ONCE(p->mm->numa_scan_seq)++;
p->mm->numa_scan_offset = 0;

If the compiler tears that up we might skip an update - again 
statistical noise at worst.

Nor is compiler tearing the only theoretical worry here: in theory, 
with long cache coherency latencies we might get two updates 'mixed 
up' and resulting in a (single) missed update.

Only atomics would solve all the races, but I think that would be 
overdoing it.

This is what I meant by that there's no harm from this race.

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Wed, Apr 15, 2015 at 09:46:01AM +0200, Ingo Molnar wrote:

 > @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
 > int pages, int flags)
 >  
 >  static void reset_ptenuma_scan(struct task_struct *p)
 >  {
 > -ACCESS_ONCE(p->mm->numa_scan_seq)++;
 > +WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
 
vs

seq = ACCESS_ONCE(p->mm->numa_scan_seq);
if (p->numa_scan_seq == seq)
return;
p->numa_scan_seq = seq;


> So the original ACCESS_ONCE() barriers were misguided to begin with: I 
> think they tried to handle races with the scheduler balancing softirq 
> and tried to avoid having to use atomics for the sequence counter 
> (which would be overkill), but things like ACCESS_ONCE(x)++ never 
> guaranteed atomicity (or even coherency) of the update.
> 
> But since in reality this is only statistical sampling code, all these 
> compiler barriers can be removed I think. Peter, Mel, Rik, do you 
> agree?

ACCESS_ONCE() is not a compiler barrier

The 'read' side uses ACCESS_ONCE() for two purposes:
 - to load the value once, we don't want the seq number to change under
   us for obvious reasons
 - to avoid load tearing and observe weird seq numbers

The update side uses ACCESS_ONCE() to avoid write tearing, and strictly
speaking it should also worry about read-tearing since its not hard
serialized, although its very unlikely to actually have concurrency
(IIRC).
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Wed, Apr 15, 2015 at 09:46:01AM +0200, Ingo Molnar wrote:

  @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
  int pages, int flags)
   
   static void reset_ptenuma_scan(struct task_struct *p)
   {
  -ACCESS_ONCE(p-mm-numa_scan_seq)++;
  +WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
 
vs

seq = ACCESS_ONCE(p-mm-numa_scan_seq);
if (p-numa_scan_seq == seq)
return;
p-numa_scan_seq = seq;


 So the original ACCESS_ONCE() barriers were misguided to begin with: I 
 think they tried to handle races with the scheduler balancing softirq 
 and tried to avoid having to use atomics for the sequence counter 
 (which would be overkill), but things like ACCESS_ONCE(x)++ never 
 guaranteed atomicity (or even coherency) of the update.
 
 But since in reality this is only statistical sampling code, all these 
 compiler barriers can be removed I think. Peter, Mel, Rik, do you 
 agree?

ACCESS_ONCE() is not a compiler barrier

The 'read' side uses ACCESS_ONCE() for two purposes:
 - to load the value once, we don't want the seq number to change under
   us for obvious reasons
 - to avoid load tearing and observe weird seq numbers

The update side uses ACCESS_ONCE() to avoid write tearing, and strictly
speaking it should also worry about read-tearing since its not hard
serialized, although its very unlikely to actually have concurrency
(IIRC).
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Ingo Molnar

* Peter Zijlstra pet...@infradead.org wrote:

 On Wed, Apr 15, 2015 at 09:46:01AM +0200, Ingo Molnar wrote:
 
   @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
 int pages, int flags)

static void reset_ptenuma_scan(struct task_struct *p)
{
   -  ACCESS_ONCE(p-mm-numa_scan_seq)++;
   +  WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
  
 vs
 
   seq = ACCESS_ONCE(p-mm-numa_scan_seq);
   if (p-numa_scan_seq == seq)
   return;
   p-numa_scan_seq = seq;
 
 
  So the original ACCESS_ONCE() barriers were misguided to begin with: I 
  think they tried to handle races with the scheduler balancing softirq 
  and tried to avoid having to use atomics for the sequence counter 
  (which would be overkill), but things like ACCESS_ONCE(x)++ never 
  guaranteed atomicity (or even coherency) of the update.
  
  But since in reality this is only statistical sampling code, all these 
  compiler barriers can be removed I think. Peter, Mel, Rik, do you 
  agree?
 
 ACCESS_ONCE() is not a compiler barrier

It's not a general compiler barrier (and I didn't claim so) but it is 
still a compiler barrier: it's documented as a weak, variable specific 
barrier in Documentation/memor-barriers.txt:

  COMPILER BARRIER
  

  The Linux kernel has an explicit compiler barrier function that  prevents the
  compiler from moving the memory accesses either side of it to the  other side:

barrier();

  This is a general barrier -- there are no read-read or write-write variants
  of barrier().  However, ACCESS_ONCE() can be thought of as a weak form
  for barrier() that affects only the specific accesses flagged by the
  ACCESS_ONCE().

 [...]

 The 'read' side uses ACCESS_ONCE() for two purposes:
  - to load the value once, we don't want the seq number to change under
us for obvious reasons
  - to avoid load tearing and observe weird seq numbers
 
 The update side uses ACCESS_ONCE() to avoid write tearing, and 
 strictly speaking it should also worry about read-tearing since its 
 not hard serialized, although its very unlikely to actually have 
 concurrency (IIRC).

So what bad effects can there be from the very unlikely read and write 
tearing?

AFAICS nothing particularly bad. On the read side:

seq = ACCESS_ONCE(p-mm-numa_scan_seq);
if (p-numa_scan_seq == seq)
return;
p-numa_scan_seq = seq;

If p-mm-numa_scan_seq gets loaded twice (very unlikely), and two 
different values happen, then we might get a 'double' NUMA placement 
run - i.e. statistical noise.

On the update side:

ACCESS_ONCE(p-mm-numa_scan_seq)++;
p-mm-numa_scan_offset = 0;

If the compiler tears that up we might skip an update - again 
statistical noise at worst.

Nor is compiler tearing the only theoretical worry here: in theory, 
with long cache coherency latencies we might get two updates 'mixed 
up' and resulting in a (single) missed update.

Only atomics would solve all the races, but I think that would be 
overdoing it.

This is what I meant by that there's no harm from this race.

Thanks,

Ingo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Thu, Apr 16, 2015 at 08:24:27PM +0200, Ingo Molnar wrote:
 Yes ... but that still leaves this weird feeling that it's really 
 still a bit wrong because it's not proper parallel code, we just 
 reduced the probability of the remaining races radically. And it's not 
 like GCC (or any compiler) does load tearing or even store tearing 
 under normal -O2 for such code patterns, right?

I think Paul once caught GCC doing something silly, but typically no.
The re-loads however have been frequently observed.

  And its not like they really cost anything.
 
 That's true.
 
 Would it make sense to add a few comments to the seq field definition 
 site(s), about how it's supposed to be accessed - or to the 
 READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

For sure, can do a comment no problem.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Jason Low
On Thu, 2015-04-16 at 20:15 +0200, Peter Zijlstra wrote:
 On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
   ACCESS_ONCE() is not a compiler barrier
  
  It's not a general compiler barrier (and I didn't claim so) but it is 
  still a compiler barrier: it's documented as a weak, variable specific 
  barrier in Documentation/memor-barriers.txt:
 
 Ok, fair enough. I just don't generally think of them as 'barriers'.
 
   The 'read' side uses ACCESS_ONCE() for two purposes:
- to load the value once, we don't want the seq number to change under
  us for obvious reasons
- to avoid load tearing and observe weird seq numbers
   
   The update side uses ACCESS_ONCE() to avoid write tearing, and 
   strictly speaking it should also worry about read-tearing since its 
   not hard serialized, although its very unlikely to actually have 
   concurrency (IIRC).
 
  This is what I meant by that there's no harm from this race.
 
 Ok, but I would still like to preserve the READ one on the usage site
 and the WRITE one on the update side, if only as documentation that
 there's something 'special' happening.

In that case, in our patch 2, I suppose we also want to use READ_ONCE()
when accessing the running field, which also helps document that we're
reading and writing to a non-atomic value that gets accessed without a
lock.

 And while the effects here might end up being statistical noise, I have
 conceptual problems with re-reading seq counts, that's not proper.
 
 And its not like they really cost anything.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Paul E. McKenney
On Thu, Apr 16, 2015 at 09:02:08PM +0200, Peter Zijlstra wrote:
 On Thu, Apr 16, 2015 at 08:24:27PM +0200, Ingo Molnar wrote:
  Yes ... but that still leaves this weird feeling that it's really 
  still a bit wrong because it's not proper parallel code, we just 
  reduced the probability of the remaining races radically. And it's not 
  like GCC (or any compiler) does load tearing or even store tearing 
  under normal -O2 for such code patterns, right?
 
 I think Paul once caught GCC doing something silly, but typically no.
 The re-loads however have been frequently observed.

Too true!

Some architectures do split stores of constants.  For example, given
an architecture with a store-immediate instruction with (say) a four-bit 
immediate field, gcc can compile this:

x = 0x00020008;

to something like:

st $2, (x+2)
st $8, (x)

And gcc was doing this even though the store to x had volatile semantics,
a bug which has thankfully since been fixed.

But then again, I am paranoid.  So I would not put it past gcc to think
to itself Hmmm...  I just loaded x a few instructions back, and only
clobbered the low-order byte.  So I will just reload that byte into
low-order byte of the register containing the remnants of the previous
load.

No, I have never seen gcc do that, but a C compiler could do that and
still claim to be complying with the standard.  :-/

Thanx, Paul

   And its not like they really cost anything.
  
  That's true.
  
  Would it make sense to add a few comments to the seq field definition 
  site(s), about how it's supposed to be accessed - or to the 
  READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?
 
 For sure, can do a comment no problem.
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Jason Low
On Thu, 2015-04-16 at 20:24 +0200, Ingo Molnar wrote:
 Would it make sense to add a few comments to the seq field definition 
 site(s), about how it's supposed to be accessed - or to the 
 READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

How about this:

---
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a44371..63fa87f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1794,6 +1794,11 @@ static void task_numa_placement(struct task_struct *p)
u64 runtime, period;
spinlock_t *group_lock = NULL;
 
+   /*
+* The p-mm-numa_scan_seq gets updated without
+* exclusive access. Use READ_ONCE() here to ensure
+* that the field is read in a single access.
+*/
seq = READ_ONCE(p-mm-numa_scan_seq);
if (p-numa_scan_seq == seq)
return;
@@ -2107,6 +2112,13 @@ void task_numa_fault(int last_cpupid, int mem_node, int 
pages, int flags)
 
 static void reset_ptenuma_scan(struct task_struct *p)
 {
+   /*
+* We only did a read acquisition of the mmap sem, so
+* p-mm-numa_scan_seq is written to without exclusive access.
+* That's not much of an issue though, since this is just used
+* for statistical sampling. Use WRITE_ONCE and READ_ONCE, which
+* are not expensive, to avoid load/store tearing.
+*/
WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
p-mm-numa_scan_offset = 0;
 }


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Peter Zijlstra
On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
  ACCESS_ONCE() is not a compiler barrier
 
 It's not a general compiler barrier (and I didn't claim so) but it is 
 still a compiler barrier: it's documented as a weak, variable specific 
 barrier in Documentation/memor-barriers.txt:

Ok, fair enough. I just don't generally think of them as 'barriers'.

  The 'read' side uses ACCESS_ONCE() for two purposes:
   - to load the value once, we don't want the seq number to change under
 us for obvious reasons
   - to avoid load tearing and observe weird seq numbers
  
  The update side uses ACCESS_ONCE() to avoid write tearing, and 
  strictly speaking it should also worry about read-tearing since its 
  not hard serialized, although its very unlikely to actually have 
  concurrency (IIRC).

 This is what I meant by that there's no harm from this race.

Ok, but I would still like to preserve the READ one on the usage site
and the WRITE one on the update side, if only as documentation that
there's something 'special' happening.

And while the effects here might end up being statistical noise, I have
conceptual problems with re-reading seq counts, that's not proper.

And its not like they really cost anything.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-16 Thread Ingo Molnar

* Peter Zijlstra pet...@infradead.org wrote:

 On Thu, Apr 16, 2015 at 08:02:27PM +0200, Ingo Molnar wrote:
   ACCESS_ONCE() is not a compiler barrier
  
  It's not a general compiler barrier (and I didn't claim so) but it is 
  still a compiler barrier: it's documented as a weak, variable specific 
  barrier in Documentation/memor-barriers.txt:
 
 Ok, fair enough. I just don't generally think of them as 'barriers'.
 
   The 'read' side uses ACCESS_ONCE() for two purposes:
- to load the value once, we don't want the seq number to change under
  us for obvious reasons
- to avoid load tearing and observe weird seq numbers
   
   The update side uses ACCESS_ONCE() to avoid write tearing, and 
   strictly speaking it should also worry about read-tearing since its 
   not hard serialized, although its very unlikely to actually have 
   concurrency (IIRC).
 
  This is what I meant by that there's no harm from this race.
 
 Ok, but I would still like to preserve the READ one on the usage 
 site and the WRITE one on the update side, if only as documentation 
 that there's something 'special' happening.
 
 And while the effects here might end up being statistical noise, I 
 have conceptual problems with re-reading seq counts, that's not 
 proper.

Yes ... but that still leaves this weird feeling that it's really 
still a bit wrong because it's not proper parallel code, we just 
reduced the probability of the remaining races radically. And it's not 
like GCC (or any compiler) does load tearing or even store tearing 
under normal -O2 for such code patterns, right?

 
 And its not like they really cost anything.

That's true.

Would it make sense to add a few comments to the seq field definition 
site(s), about how it's supposed to be accessed - or to the 
READ_ONCE()/WRITE_ONCE() sites, to keep people from wondering?

Thanks,

Ingo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
Hi Ingo,

On Wed, 2015-04-15 at 09:46 +0200, Ingo Molnar wrote:
> * Steven Rostedt  wrote:
> > You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
> > and just a:
> > 
> > p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;
> > 
> > Can be done. But I'm still trying to wrap my head around why this is
> > needed here. Comments would have been really helpful. We should make
> > all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
> > comments just like we do with memory barriers.
> 
> So the original ACCESS_ONCE() barriers were misguided to begin with: I 
> think they tried to handle races with the scheduler balancing softirq 
> and tried to avoid having to use atomics for the sequence counter 
> (which would be overkill), but things like ACCESS_ONCE(x)++ never 
> guaranteed atomicity (or even coherency) of the update.
> 
> But since in reality this is only statistical sampling code, all these 
> compiler barriers can be removed I think. Peter, Mel, Rik, do you 
> agree?

Though in the read side for accessing things such as numa_scan_seq, we
still want to keep them in order to guarantee that numa_scan_seq is only
loaded once right?

static void task_numa_placement(struct task_struct *p)
{
...

seq = ACCESS_ONCE(p->mm->numa_scan_seq);
if (p->numa_scan_seq == seq)
return;
p->numa_scan_seq = seq;

...
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Steven Rostedt
On Wed, 15 Apr 2015 19:29:01 -0700
Jason Low  wrote:

> On Tue, 2015-04-14 at 22:40 -0400, Steven Rostedt wrote:
> > You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
> > and just a:
> > 
> > p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;
> 
> Just to confirm, is this a typo? Because there really is a numa_scan_seq
> in the task_struct itself too :)
> 

Oops, yeah that was a typo.

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
On Tue, 2015-04-14 at 22:40 -0400, Steven Rostedt wrote:
> You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
> and just a:
> 
>   p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;

Just to confirm, is this a typo? Because there really is a numa_scan_seq
in the task_struct itself too :)

p->mm->numa_scan_seq is read in task_numa_placement() with
ACCESS_ONCE(), and so the benefit that I do see with it is that it makes
it consistent by doing the updates with ACCESS_ONCE too (for
documentation purposes).

If that's really the case:

WRITE_ONCE(p->mm->numa_scan_seq, p->mm->numa_scan_seq + 1)

should be enough for that.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Steven Rostedt
On Wed, 15 Apr 2015 11:49:09 -0700
Jason Low  wrote:

> So I'll keep the READ_ONCE nested inside WRITE_ONCE for the purpose of
> this patch since this patch is a conversion from ACCESS_ONCE, but yes,
> if the original purpose of ACCESS_ONCE was to do an atomic increment,
> then the ACCESS_ONCE doesn't help with that.

For the purpose of this patch, I think it's fine, as being more
paranoid is better than not being paranoid enough.

But this has shined light onto whether it is needed or not, and we
should figure that out in the not so far future.

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
On Wed, 2015-04-15 at 09:46 +0200, Ingo Molnar wrote:
> * Steven Rostedt  wrote:

> > You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
> > and just a:
> > 
> > p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;
> > 
> > Can be done. But I'm still trying to wrap my head around why this is
> > needed here. Comments would have been really helpful. We should make
> > all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
> > comments just like we do with memory barriers.
> 
> So the original ACCESS_ONCE() barriers were misguided to begin with: I 
> think they tried to handle races with the scheduler balancing softirq 
> and tried to avoid having to use atomics for the sequence counter 
> (which would be overkill), but things like ACCESS_ONCE(x)++ never 
> guaranteed atomicity (or even coherency) of the update.
> 
> But since in reality this is only statistical sampling code, all these 
> compiler barriers can be removed I think. Peter, Mel, Rik, do you 
> agree?

So I'll keep the READ_ONCE nested inside WRITE_ONCE for the purpose of
this patch since this patch is a conversion from ACCESS_ONCE, but yes,
if the original purpose of ACCESS_ONCE was to do an atomic increment,
then the ACCESS_ONCE doesn't help with that.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Ingo Molnar

* Steven Rostedt  wrote:

> On Tue, 14 Apr 2015 19:12:33 -0700
> Jason Low  wrote:
> 
> > Hi Steven,
> > 
> > On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
> > > On Tue, 14 Apr 2015 16:09:44 -0700
> > > Jason Low  wrote:
> > > 
> > > 
> > > > @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int 
> > > > mem_node, int pages, int flags)
> > > >  
> > > >  static void reset_ptenuma_scan(struct task_struct *p)
> > > >  {
> > > > -   ACCESS_ONCE(p->mm->numa_scan_seq)++;
> > > > +   WRITE_ONCE(p->mm->numa_scan_seq, 
> > > > READ_ONCE(p->mm->numa_scan_seq) + 1);
> > > 
> > > Is the READ_ONCE() inside the WRITE_ONCE() really necessary?
> > 
> > Yeah, I think so to be safe, otherwise, the access of
> > p->mm->numa_scan_seq in the 2nd parameter doesn't have the volatile
> > cast.
> 
> You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
> and just a:
> 
>   p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;
> 
> Can be done. But I'm still trying to wrap my head around why this is
> needed here. Comments would have been really helpful. We should make
> all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
> comments just like we do with memory barriers.

So the original ACCESS_ONCE() barriers were misguided to begin with: I 
think they tried to handle races with the scheduler balancing softirq 
and tried to avoid having to use atomics for the sequence counter 
(which would be overkill), but things like ACCESS_ONCE(x)++ never 
guaranteed atomicity (or even coherency) of the update.

But since in reality this is only statistical sampling code, all these 
compiler barriers can be removed I think. Peter, Mel, Rik, do you 
agree?

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
On Wed, 2015-04-15 at 09:46 +0200, Ingo Molnar wrote:
 * Steven Rostedt rost...@goodmis.org wrote:

  You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
  and just a:
  
  p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;
  
  Can be done. But I'm still trying to wrap my head around why this is
  needed here. Comments would have been really helpful. We should make
  all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
  comments just like we do with memory barriers.
 
 So the original ACCESS_ONCE() barriers were misguided to begin with: I 
 think they tried to handle races with the scheduler balancing softirq 
 and tried to avoid having to use atomics for the sequence counter 
 (which would be overkill), but things like ACCESS_ONCE(x)++ never 
 guaranteed atomicity (or even coherency) of the update.
 
 But since in reality this is only statistical sampling code, all these 
 compiler barriers can be removed I think. Peter, Mel, Rik, do you 
 agree?

So I'll keep the READ_ONCE nested inside WRITE_ONCE for the purpose of
this patch since this patch is a conversion from ACCESS_ONCE, but yes,
if the original purpose of ACCESS_ONCE was to do an atomic increment,
then the ACCESS_ONCE doesn't help with that.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Steven Rostedt
On Wed, 15 Apr 2015 11:49:09 -0700
Jason Low jason.l...@hp.com wrote:

 So I'll keep the READ_ONCE nested inside WRITE_ONCE for the purpose of
 this patch since this patch is a conversion from ACCESS_ONCE, but yes,
 if the original purpose of ACCESS_ONCE was to do an atomic increment,
 then the ACCESS_ONCE doesn't help with that.

For the purpose of this patch, I think it's fine, as being more
paranoid is better than not being paranoid enough.

But this has shined light onto whether it is needed or not, and we
should figure that out in the not so far future.

-- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
On Tue, 2015-04-14 at 22:40 -0400, Steven Rostedt wrote:
 You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
 and just a:
 
   p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;

Just to confirm, is this a typo? Because there really is a numa_scan_seq
in the task_struct itself too :)

p-mm-numa_scan_seq is read in task_numa_placement() with
ACCESS_ONCE(), and so the benefit that I do see with it is that it makes
it consistent by doing the updates with ACCESS_ONCE too (for
documentation purposes).

If that's really the case:

WRITE_ONCE(p-mm-numa_scan_seq, p-mm-numa_scan_seq + 1)

should be enough for that.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Steven Rostedt
On Wed, 15 Apr 2015 19:29:01 -0700
Jason Low jason.l...@hp.com wrote:

 On Tue, 2015-04-14 at 22:40 -0400, Steven Rostedt wrote:
  You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
  and just a:
  
  p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;
 
 Just to confirm, is this a typo? Because there really is a numa_scan_seq
 in the task_struct itself too :)
 

Oops, yeah that was a typo.

-- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Jason Low
Hi Ingo,

On Wed, 2015-04-15 at 09:46 +0200, Ingo Molnar wrote:
 * Steven Rostedt rost...@goodmis.org wrote:
  You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
  and just a:
  
  p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;
  
  Can be done. But I'm still trying to wrap my head around why this is
  needed here. Comments would have been really helpful. We should make
  all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
  comments just like we do with memory barriers.
 
 So the original ACCESS_ONCE() barriers were misguided to begin with: I 
 think they tried to handle races with the scheduler balancing softirq 
 and tried to avoid having to use atomics for the sequence counter 
 (which would be overkill), but things like ACCESS_ONCE(x)++ never 
 guaranteed atomicity (or even coherency) of the update.
 
 But since in reality this is only statistical sampling code, all these 
 compiler barriers can be removed I think. Peter, Mel, Rik, do you 
 agree?

Though in the read side for accessing things such as numa_scan_seq, we
still want to keep them in order to guarantee that numa_scan_seq is only
loaded once right?

static void task_numa_placement(struct task_struct *p)
{
...

seq = ACCESS_ONCE(p-mm-numa_scan_seq);
if (p-numa_scan_seq == seq)
return;
p-numa_scan_seq = seq;

...
}

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-15 Thread Ingo Molnar

* Steven Rostedt rost...@goodmis.org wrote:

 On Tue, 14 Apr 2015 19:12:33 -0700
 Jason Low jason.l...@hp.com wrote:
 
  Hi Steven,
  
  On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
   On Tue, 14 Apr 2015 16:09:44 -0700
   Jason Low jason.l...@hp.com wrote:
   
   
@@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int 
mem_node, int pages, int flags)
 
 static void reset_ptenuma_scan(struct task_struct *p)
 {
-   ACCESS_ONCE(p-mm-numa_scan_seq)++;
+   WRITE_ONCE(p-mm-numa_scan_seq, 
READ_ONCE(p-mm-numa_scan_seq) + 1);
   
   Is the READ_ONCE() inside the WRITE_ONCE() really necessary?
  
  Yeah, I think so to be safe, otherwise, the access of
  p-mm-numa_scan_seq in the 2nd parameter doesn't have the volatile
  cast.
 
 You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
 and just a:
 
   p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;
 
 Can be done. But I'm still trying to wrap my head around why this is
 needed here. Comments would have been really helpful. We should make
 all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
 comments just like we do with memory barriers.

So the original ACCESS_ONCE() barriers were misguided to begin with: I 
think they tried to handle races with the scheduler balancing softirq 
and tried to avoid having to use atomics for the sequence counter 
(which would be overkill), but things like ACCESS_ONCE(x)++ never 
guaranteed atomicity (or even coherency) of the update.

But since in reality this is only statistical sampling code, all these 
compiler barriers can be removed I think. Peter, Mel, Rik, do you 
agree?

Thanks,

Ingo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Steven Rostedt
On Tue, 14 Apr 2015 19:12:33 -0700
Jason Low  wrote:

> Hi Steven,
> 
> On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
> > On Tue, 14 Apr 2015 16:09:44 -0700
> > Jason Low  wrote:
> > 
> > 
> > > @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
> > > int pages, int flags)
> > >  
> > >  static void reset_ptenuma_scan(struct task_struct *p)
> > >  {
> > > - ACCESS_ONCE(p->mm->numa_scan_seq)++;
> > > + WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
> > 
> > Is the READ_ONCE() inside the WRITE_ONCE() really necessary?
> 
> Yeah, I think so to be safe, otherwise, the access of
> p->mm->numa_scan_seq in the 2nd parameter doesn't have the volatile
> cast.

You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
and just a:

p->mm->numa_scan_seq = READ_ONCE(p->numa_scan_seq) + 1;

Can be done. But I'm still trying to wrap my head around why this is
needed here. Comments would have been really helpful. We should make
all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
comments just like we do with memory barriers.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Jason Low
Hi Steven,

On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
> On Tue, 14 Apr 2015 16:09:44 -0700
> Jason Low  wrote:
> 
> 
> > @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
> > int pages, int flags)
> >  
> >  static void reset_ptenuma_scan(struct task_struct *p)
> >  {
> > -   ACCESS_ONCE(p->mm->numa_scan_seq)++;
> > +   WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
> 
> Is the READ_ONCE() inside the WRITE_ONCE() really necessary?

Yeah, I think so to be safe, otherwise, the access of
p->mm->numa_scan_seq in the 2nd parameter doesn't have the volatile
cast.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Steven Rostedt
On Tue, 14 Apr 2015 16:09:44 -0700
Jason Low  wrote:


> @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int 
> pages, int flags)
>  
>  static void reset_ptenuma_scan(struct task_struct *p)
>  {
> - ACCESS_ONCE(p->mm->numa_scan_seq)++;
> + WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);

Is the READ_ONCE() inside the WRITE_ONCE() really necessary?

>   p->mm->numa_scan_offset = 0;
>  }
>  

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Jason Low
ACCESS_ONCE doesn't work reliably on non-scalar types. This patch removes
the rest of the existing usages of ACCESS_ONCE in the scheduler, and use
the new READ_ONCE and WRITE_ONCE APIs.

Signed-off-by: Jason Low 
---
 include/linux/sched.h  |4 ++--
 kernel/fork.c  |2 +-
 kernel/sched/auto_group.c  |2 +-
 kernel/sched/auto_group.h  |2 +-
 kernel/sched/core.c|4 ++--
 kernel/sched/cputime.c |2 +-
 kernel/sched/deadline.c|2 +-
 kernel/sched/fair.c|   14 +++---
 kernel/sched/proc.c|4 ++--
 kernel/sched/rt.c  |2 +-
 kernel/sched/sched.h   |2 +-
 kernel/sched/wait.c|4 ++--
 kernel/time/posix-cpu-timers.c |8 
 13 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432..379fb3b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3070,13 +3070,13 @@ static inline void mm_update_next_owner(struct 
mm_struct *mm)
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
 {
-   return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+   return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
 }
 
 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
unsigned int limit)
 {
-   return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+   return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
 }
 
 static inline unsigned long rlimit(unsigned int limit)
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..d96a0ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,7 +1045,7 @@ static void posix_cpu_timers_init_group(struct 
signal_struct *sig)
/* Thread group counters. */
thread_group_cputime_init(sig);
 
-   cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+   cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
if (cpu_limit != RLIM_INFINITY) {
sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
sig->cputimer.running = 1;
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index eae160d..a8653c2 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -141,7 +141,7 @@ autogroup_move_group(struct task_struct *p, struct 
autogroup *ag)
 
p->signal->autogroup = autogroup_kref_get(ag);
 
-   if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+   if (!READ_ONCE(sysctl_sched_autogroup_enabled))
goto out;
 
for_each_thread(p, t)
diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h
index 8bd0471..890c95f 100644
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -29,7 +29,7 @@ extern bool task_wants_autogroup(struct task_struct *p, 
struct task_group *tg);
 static inline struct task_group *
 autogroup_task_group(struct task_struct *p, struct task_group *tg)
 {
-   int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+   int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
 
if (enabled && task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e..4131c24 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -508,7 +508,7 @@ static bool set_nr_and_not_polling(struct task_struct *p)
 static bool set_nr_if_polling(struct task_struct *p)
 {
struct thread_info *ti = task_thread_info(p);
-   typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+   typeof(ti->flags) old, val = READ_ONCE(ti->flags);
 
for (;;) {
if (!(val & _TIF_POLLING_NRFLAG))
@@ -2505,7 +2505,7 @@ void scheduler_tick(void)
 u64 scheduler_tick_max_deferment(void)
 {
struct rq *rq = this_rq();
-   unsigned long next, now = ACCESS_ONCE(jiffies);
+   unsigned long next, now = READ_ONCE(jiffies);
 
next = rq->last_sched_tick + HZ;
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..f5a64ff 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -567,7 +567,7 @@ static void cputime_advance(cputime_t *counter, cputime_t 
new)
 {
cputime_t old;
 
-   while (new > (old = ACCESS_ONCE(*counter)))
+   while (new > (old = READ_ONCE(*counter)))
cmpxchg_cputime(counter, old, new);
 }
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3fa8fa6..fa43e3f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -932,7 +932,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int 
sd_flag, int flags)
rq = cpu_rq(cpu);
 
rcu_read_lock();
-   curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+   curr = READ_ONCE(rq->curr); /* unlocked access */
 
/*
 * If we are dealing with a -deadline task, we must
diff --git 

Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Steven Rostedt
On Tue, 14 Apr 2015 16:09:44 -0700
Jason Low jason.l...@hp.com wrote:


 @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int 
 pages, int flags)
  
  static void reset_ptenuma_scan(struct task_struct *p)
  {
 - ACCESS_ONCE(p-mm-numa_scan_seq)++;
 + WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);

Is the READ_ONCE() inside the WRITE_ONCE() really necessary?

   p-mm-numa_scan_offset = 0;
  }
  

-- Steve
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Jason Low
Hi Steven,

On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
 On Tue, 14 Apr 2015 16:09:44 -0700
 Jason Low jason.l...@hp.com wrote:
 
 
  @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
  int pages, int flags)
   
   static void reset_ptenuma_scan(struct task_struct *p)
   {
  -   ACCESS_ONCE(p-mm-numa_scan_seq)++;
  +   WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
 
 Is the READ_ONCE() inside the WRITE_ONCE() really necessary?

Yeah, I think so to be safe, otherwise, the access of
p-mm-numa_scan_seq in the 2nd parameter doesn't have the volatile
cast.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Steven Rostedt
On Tue, 14 Apr 2015 19:12:33 -0700
Jason Low jason.l...@hp.com wrote:

 Hi Steven,
 
 On Tue, 2015-04-14 at 19:59 -0400, Steven Rostedt wrote:
  On Tue, 14 Apr 2015 16:09:44 -0700
  Jason Low jason.l...@hp.com wrote:
  
  
   @@ -2088,7 +2088,7 @@ void task_numa_fault(int last_cpupid, int mem_node, 
   int pages, int flags)

static void reset_ptenuma_scan(struct task_struct *p)
{
   - ACCESS_ONCE(p-mm-numa_scan_seq)++;
   + WRITE_ONCE(p-mm-numa_scan_seq, READ_ONCE(p-mm-numa_scan_seq) + 1);
  
  Is the READ_ONCE() inside the WRITE_ONCE() really necessary?
 
 Yeah, I think so to be safe, otherwise, the access of
 p-mm-numa_scan_seq in the 2nd parameter doesn't have the volatile
 cast.

You are correct. Now I'm thinking that the WRITE_ONCE() is not needed,
and just a:

p-mm-numa_scan_seq = READ_ONCE(p-numa_scan_seq) + 1;

Can be done. But I'm still trying to wrap my head around why this is
needed here. Comments would have been really helpful. We should make
all READ_ONCE() WRITE_ONCE and obsolete ACCESS_ONCE() have mandatory
comments just like we do with memory barriers.

-- Steve

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] sched, timer: Remove usages of ACCESS_ONCE in the scheduler

2015-04-14 Thread Jason Low
ACCESS_ONCE doesn't work reliably on non-scalar types. This patch removes
the rest of the existing usages of ACCESS_ONCE in the scheduler, and use
the new READ_ONCE and WRITE_ONCE APIs.

Signed-off-by: Jason Low jason.l...@hp.com
---
 include/linux/sched.h  |4 ++--
 kernel/fork.c  |2 +-
 kernel/sched/auto_group.c  |2 +-
 kernel/sched/auto_group.h  |2 +-
 kernel/sched/core.c|4 ++--
 kernel/sched/cputime.c |2 +-
 kernel/sched/deadline.c|2 +-
 kernel/sched/fair.c|   14 +++---
 kernel/sched/proc.c|4 ++--
 kernel/sched/rt.c  |2 +-
 kernel/sched/sched.h   |2 +-
 kernel/sched/wait.c|4 ++--
 kernel/time/posix-cpu-timers.c |8 
 13 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432..379fb3b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3070,13 +3070,13 @@ static inline void mm_update_next_owner(struct 
mm_struct *mm)
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
 {
-   return ACCESS_ONCE(tsk-signal-rlim[limit].rlim_cur);
+   return READ_ONCE(tsk-signal-rlim[limit].rlim_cur);
 }
 
 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
unsigned int limit)
 {
-   return ACCESS_ONCE(tsk-signal-rlim[limit].rlim_max);
+   return READ_ONCE(tsk-signal-rlim[limit].rlim_max);
 }
 
 static inline unsigned long rlimit(unsigned int limit)
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..d96a0ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,7 +1045,7 @@ static void posix_cpu_timers_init_group(struct 
signal_struct *sig)
/* Thread group counters. */
thread_group_cputime_init(sig);
 
-   cpu_limit = ACCESS_ONCE(sig-rlim[RLIMIT_CPU].rlim_cur);
+   cpu_limit = READ_ONCE(sig-rlim[RLIMIT_CPU].rlim_cur);
if (cpu_limit != RLIM_INFINITY) {
sig-cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
sig-cputimer.running = 1;
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index eae160d..a8653c2 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -141,7 +141,7 @@ autogroup_move_group(struct task_struct *p, struct 
autogroup *ag)
 
p-signal-autogroup = autogroup_kref_get(ag);
 
-   if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+   if (!READ_ONCE(sysctl_sched_autogroup_enabled))
goto out;
 
for_each_thread(p, t)
diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h
index 8bd0471..890c95f 100644
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -29,7 +29,7 @@ extern bool task_wants_autogroup(struct task_struct *p, 
struct task_group *tg);
 static inline struct task_group *
 autogroup_task_group(struct task_struct *p, struct task_group *tg)
 {
-   int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+   int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
 
if (enabled  task_wants_autogroup(p, tg))
return p-signal-autogroup-tg;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e..4131c24 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -508,7 +508,7 @@ static bool set_nr_and_not_polling(struct task_struct *p)
 static bool set_nr_if_polling(struct task_struct *p)
 {
struct thread_info *ti = task_thread_info(p);
-   typeof(ti-flags) old, val = ACCESS_ONCE(ti-flags);
+   typeof(ti-flags) old, val = READ_ONCE(ti-flags);
 
for (;;) {
if (!(val  _TIF_POLLING_NRFLAG))
@@ -2505,7 +2505,7 @@ void scheduler_tick(void)
 u64 scheduler_tick_max_deferment(void)
 {
struct rq *rq = this_rq();
-   unsigned long next, now = ACCESS_ONCE(jiffies);
+   unsigned long next, now = READ_ONCE(jiffies);
 
next = rq-last_sched_tick + HZ;
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..f5a64ff 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -567,7 +567,7 @@ static void cputime_advance(cputime_t *counter, cputime_t 
new)
 {
cputime_t old;
 
-   while (new  (old = ACCESS_ONCE(*counter)))
+   while (new  (old = READ_ONCE(*counter)))
cmpxchg_cputime(counter, old, new);
 }
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3fa8fa6..fa43e3f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -932,7 +932,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int 
sd_flag, int flags)
rq = cpu_rq(cpu);
 
rcu_read_lock();
-   curr = ACCESS_ONCE(rq-curr); /* unlocked access */
+   curr = READ_ONCE(rq-curr); /* unlocked access */
 
/*
 * If we are dealing with a -deadline task, we must
diff --git a/kernel/sched/fair.c