Re: [PATCH] x86/fpu: move FPU state into separate cache

2017-03-30 Thread kbuild test robot
Hi Kees,

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.11-rc4 next-20170330]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/x86-fpu-move-FPU-state-into-separate-cache/20170331-110507
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-s0-201713 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/cpufeature.h:4:0,
from arch/x86/include/asm/thread_info.h:52,
from include/linux/thread_info.h:25,
from arch/x86/include/asm/preempt.h:6,
from include/linux/preempt.h:80,
from include/linux/spinlock.h:50,
from include/linux/rcupdate.h:38,
from include/linux/init_task.h:4,
from init/init_task.c:1:
>> arch/x86/include/asm/processor.h:805:17: error: 'init_fpregs_state' 
>> undeclared here (not in a function)
 .fpu.state  = _fpregs_state, \
^
>> include/linux/init_task.h:255:13: note: in expansion of macro 'INIT_THREAD'
 .thread  = INIT_THREAD, \
^~~
>> init/init_task.c:19:32: note: in expansion of macro 'INIT_TASK'
struct task_struct init_task = INIT_TASK(init_task);
   ^
--
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/status_w.h: In function 'setcc':
>> arch/x86/math-emu/fpu_system.h:71:30: error: 
>> 'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define partial_status  (I387->soft.swd)
 ^
 ->
>> arch/x86/math-emu/status_w.h:53:2: note: in expansion of macro 
>> 'partial_status'
 partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3);
 ^~
>> arch/x86/math-emu/fpu_system.h:71:30: error: 
>> 'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define partial_status  (I387->soft.swd)
 ^
 ->
   arch/x86/math-emu/status_w.h:54:2: note: in expansion of macro 
'partial_status'
 partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3);
 ^~
   arch/x86/math-emu/fpu_entry.c: In function 'math_emulate':
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
 ->
>> arch/x86/math-emu/fpu_entry.c:126:2: note: in expansion of macro 'FPU_info'
 FPU_info = info;
 ^~~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
 ->
>> arch/x86/math-emu/fpu_system.h:58:24: note: in expansion of macro 'FPU_info'
#define FPU_ORIG_EIP  (FPU_info->___orig_eip)
   ^~~~
>> arch/x86/math-emu/fpu_entry.c:128:2: note: in expansion of macro 
>> 'FPU_ORIG_EIP'
 FPU_ORIG_EIP = FPU_EIP;
 ^~~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
->
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^~~~
>> arch/x86/math-emu/fpu_entry.c:128:17: note: in expansion of macro 'FPU_EIP'
 FPU_ORIG_EIP = FPU_EIP;
^~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
  ->
   arch/x86/math-emu/fpu_system.h:56:22: note: in expansion of macro 'FPU_info'
#define FPU_EFLAGS  (FPU_info->regs->flags)
 ^~~~
>> arch/x86/math-emu/fpu_entry.c:130:7: note: in expansion of macro 'FPU_EFLAGS'
 if ((FPU_EFLAGS & 0x0002) != 0) {
  ^~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
  ->
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^

Re: [PATCH] x86/fpu: move FPU state into separate cache

2017-03-30 Thread kbuild test robot
Hi Kees,

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.11-rc4 next-20170330]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/x86-fpu-move-FPU-state-into-separate-cache/20170331-110507
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-s0-201713 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/cpufeature.h:4:0,
from arch/x86/include/asm/thread_info.h:52,
from include/linux/thread_info.h:25,
from arch/x86/include/asm/preempt.h:6,
from include/linux/preempt.h:80,
from include/linux/spinlock.h:50,
from include/linux/rcupdate.h:38,
from include/linux/init_task.h:4,
from init/init_task.c:1:
>> arch/x86/include/asm/processor.h:805:17: error: 'init_fpregs_state' 
>> undeclared here (not in a function)
 .fpu.state  = _fpregs_state, \
^
>> include/linux/init_task.h:255:13: note: in expansion of macro 'INIT_THREAD'
 .thread  = INIT_THREAD, \
^~~
>> init/init_task.c:19:32: note: in expansion of macro 'INIT_TASK'
struct task_struct init_task = INIT_TASK(init_task);
   ^
--
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/status_w.h: In function 'setcc':
>> arch/x86/math-emu/fpu_system.h:71:30: error: 
>> 'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define partial_status  (I387->soft.swd)
 ^
 ->
>> arch/x86/math-emu/status_w.h:53:2: note: in expansion of macro 
>> 'partial_status'
 partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3);
 ^~
>> arch/x86/math-emu/fpu_system.h:71:30: error: 
>> 'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define partial_status  (I387->soft.swd)
 ^
 ->
   arch/x86/math-emu/status_w.h:54:2: note: in expansion of macro 
'partial_status'
 partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3);
 ^~
   arch/x86/math-emu/fpu_entry.c: In function 'math_emulate':
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
 ->
>> arch/x86/math-emu/fpu_entry.c:126:2: note: in expansion of macro 'FPU_info'
 FPU_info = info;
 ^~~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
 ->
>> arch/x86/math-emu/fpu_system.h:58:24: note: in expansion of macro 'FPU_info'
#define FPU_ORIG_EIP  (FPU_info->___orig_eip)
   ^~~~
>> arch/x86/math-emu/fpu_entry.c:128:2: note: in expansion of macro 
>> 'FPU_ORIG_EIP'
 FPU_ORIG_EIP = FPU_EIP;
 ^~~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
->
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^~~~
>> arch/x86/math-emu/fpu_entry.c:128:17: note: in expansion of macro 'FPU_EIP'
 FPU_ORIG_EIP = FPU_EIP;
^~~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
  ->
   arch/x86/math-emu/fpu_system.h:56:22: note: in expansion of macro 'FPU_info'
#define FPU_EFLAGS  (FPU_info->regs->flags)
 ^~~~
>> arch/x86/math-emu/fpu_entry.c:130:7: note: in expansion of macro 'FPU_EFLAGS'
 if ((FPU_EFLAGS & 0x0002) != 0) {
  ^~
   arch/x86/math-emu/fpu_system.h:50:24: error: 
'get_current()->thread.fpu.state' is a pointer; did you mean to use '->'?
#define FPU_info  (I387->soft.info)
   ^
  ->
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^

linux-next: manual merge of the akpm tree with the tip tree

2017-03-30 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the akpm tree got a conflict in:

  kernel/sched/fair.c

between commit:

  0ccb977f4c80 ("sched/fair: Explicitly generate __update_load_avg() instances")

from the tip tree and patch:

   "kernel/sched/fair.c: uninline __update_load_avg()"

from the akpm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc kernel/sched/fair.c
index 359dbc05a3b4,28a2bd8bfb67..
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -2929,11 -2848,14 +2929,11 @@@ accumulate_sum(u64 delta, int cpu, stru
   *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
   *= u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
   */
- static __always_inline int
+ static int
 -__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 +___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
  unsigned long weight, int running, struct cfs_rq *cfs_rq)
  {
 -  u64 delta, scaled_delta, periods;
 -  u32 contrib;
 -  unsigned int delta_w, scaled_delta_w, decayed = 0;
 -  unsigned long scale_freq, scale_cpu;
 +  u64 delta;
  
delta = now - sa->last_update_time;
/*


linux-next: manual merge of the akpm tree with the tip tree

2017-03-30 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the akpm tree got a conflict in:

  kernel/sched/fair.c

between commit:

  0ccb977f4c80 ("sched/fair: Explicitly generate __update_load_avg() instances")

from the tip tree and patch:

   "kernel/sched/fair.c: uninline __update_load_avg()"

from the akpm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc kernel/sched/fair.c
index 359dbc05a3b4,28a2bd8bfb67..
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -2929,11 -2848,14 +2929,11 @@@ accumulate_sum(u64 delta, int cpu, stru
   *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
   *= u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
   */
- static __always_inline int
+ static int
 -__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 +___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
  unsigned long weight, int running, struct cfs_rq *cfs_rq)
  {
 -  u64 delta, scaled_delta, periods;
 -  u32 contrib;
 -  unsigned int delta_w, scaled_delta_w, decayed = 0;
 -  unsigned long scale_freq, scale_cpu;
 +  u64 delta;
  
delta = now - sa->last_update_time;
/*


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala

On 30.03.2017 20:44, Laura Abbott wrote:

On 03/30/2017 10:37 AM, Kees Cook wrote:


Reads out of /dev/mem should be restricted to non-RAM on Fedora, yes?

Tommi, do your kernels have CONFIG_STRICT_DEVMEM=y ?

-Kees



CONFIG_STRICT_DEVMEM should be on in all Fedora kernels.


Yes, the fedora kernels do have it enabled:

  $ grep STRICT_DEVMEM /boot/config-4.9.14-200.fc25.x86_64
  CONFIG_STRICT_DEVMEM=y
  CONFIG_IO_STRICT_DEVMEM=y

But I do not have it in my own build:

  $ grep STRICT_DEVMEM .config
  # CONFIG_STRICT_DEVMEM is not set

-Tommi


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala

On 30.03.2017 20:44, Laura Abbott wrote:

On 03/30/2017 10:37 AM, Kees Cook wrote:


Reads out of /dev/mem should be restricted to non-RAM on Fedora, yes?

Tommi, do your kernels have CONFIG_STRICT_DEVMEM=y ?

-Kees



CONFIG_STRICT_DEVMEM should be on in all Fedora kernels.


Yes, the fedora kernels do have it enabled:

  $ grep STRICT_DEVMEM /boot/config-4.9.14-200.fc25.x86_64
  CONFIG_STRICT_DEVMEM=y
  CONFIG_IO_STRICT_DEVMEM=y

But I do not have it in my own build:

  $ grep STRICT_DEVMEM .config
  # CONFIG_STRICT_DEVMEM is not set

-Tommi


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala



On 30.03.2017 23:01, Dave Jones wrote:

On Thu, Mar 30, 2017 at 12:52:31PM -0700, Kees Cook wrote:
 > On Thu, Mar 30, 2017 at 12:41 PM, Dave Jones  wrote:
 > > On Thu, Mar 30, 2017 at 09:45:26AM -0700, Kees Cook wrote:
 > >  > On Wed, Mar 29, 2017 at 11:44 PM, Tommi Rantala
 > >  >  wrote:
 > >  > > Hi,
 > >  > >
 > >  > > Running:
 > >  > >
 > >  > >   $ sudo x86info -a
 > >  > >
 > >  > > On this HP ZBook 15 G3 laptop kills the x86info process with segfault 
and
 > >  > > produces the following kernel BUG.
 > >  > >
 > >  > >   $ git describe
 > >  > >   v4.11-rc4-40-gfe82203
 > >  > >
 > >  > > It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64
 > >  > >
 > >  > > Full dmesg output here: https://pastebin.com/raw/Kur2mpZq
 > >  > >
 > >  > > [   51.418954] usercopy: kernel memory exposure attempt detected from
 > >  > > 8809 (dma-kmalloc-256) (4096 bytes)
 > >  >
 > >  > This seems like a real exposure: the copy is attempting to read 4096
 > >  > bytes from a 256 byte object.
 > >
 > > The code[1] is doing a 4k read from /dev/mem in the range 0x9 -> 
0xa
 > > According to arch/x86/mm/init.c:devmem_is_allowed, that's still valid..
 > >
 > > Note that the printk is using the direct mapping address. Is that what's
 > > being passed down to devmem_is_allowed now ? If so, that's probably what 
broke.
 >
 > So this is attempting to read physical memory 0x9 -> 0xa, but
 > that's somehow resolving to a virtual address that is claimed by
 > dma-kmalloc?? I'm confused how that's happening...

The only thing that I can think of would be a rogue ptr in the bios
table, but that seems unlikely.  Tommi, can you put strace of x86info -mp 
somewhere?
That will confirm/deny whether we're at least asking the kernel to do sane 
things.


Indeed the bug happens when reading from /dev/mem:

https://pastebin.com/raw/ZEJGQP1X

# strace -f -y x86info -mp
[...]
open("/dev/mem", O_RDONLY)  = 3
lseek(3, 1038, SEEK_SET)  = 1038
read(3, "\300\235", 2)= 2
lseek(3, 646144, SEEK_SET)= 646144
read(3, 
"\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 1043, SEEK_SET)  = 1043
read(3, "w\2", 2) = 2
lseek(3, 645120, SEEK_SET)= 645120
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 654336, SEEK_SET)= 654336
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 983040, SEEK_SET)= 983040
read(3, 
"IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 
65536) = 65536

lseek(3, 917504, SEEK_SET)= 917504
read(3, 
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 
65536) = 65536

lseek(3, 524288, SEEK_SET)= 524288
read(3,  )= ?
+++ killed by SIGSEGV +++


Re: sudo x86info -a => kernel BUG at mm/usercopy.c:78!

2017-03-30 Thread Tommi Rantala



On 30.03.2017 23:01, Dave Jones wrote:

On Thu, Mar 30, 2017 at 12:52:31PM -0700, Kees Cook wrote:
 > On Thu, Mar 30, 2017 at 12:41 PM, Dave Jones  wrote:
 > > On Thu, Mar 30, 2017 at 09:45:26AM -0700, Kees Cook wrote:
 > >  > On Wed, Mar 29, 2017 at 11:44 PM, Tommi Rantala
 > >  >  wrote:
 > >  > > Hi,
 > >  > >
 > >  > > Running:
 > >  > >
 > >  > >   $ sudo x86info -a
 > >  > >
 > >  > > On this HP ZBook 15 G3 laptop kills the x86info process with segfault 
and
 > >  > > produces the following kernel BUG.
 > >  > >
 > >  > >   $ git describe
 > >  > >   v4.11-rc4-40-gfe82203
 > >  > >
 > >  > > It is also reproducible with the fedora kernel: 4.9.14-200.fc25.x86_64
 > >  > >
 > >  > > Full dmesg output here: https://pastebin.com/raw/Kur2mpZq
 > >  > >
 > >  > > [   51.418954] usercopy: kernel memory exposure attempt detected from
 > >  > > 8809 (dma-kmalloc-256) (4096 bytes)
 > >  >
 > >  > This seems like a real exposure: the copy is attempting to read 4096
 > >  > bytes from a 256 byte object.
 > >
 > > The code[1] is doing a 4k read from /dev/mem in the range 0x9 -> 
0xa
 > > According to arch/x86/mm/init.c:devmem_is_allowed, that's still valid..
 > >
 > > Note that the printk is using the direct mapping address. Is that what's
 > > being passed down to devmem_is_allowed now ? If so, that's probably what 
broke.
 >
 > So this is attempting to read physical memory 0x9 -> 0xa, but
 > that's somehow resolving to a virtual address that is claimed by
 > dma-kmalloc?? I'm confused how that's happening...

The only thing that I can think of would be a rogue ptr in the bios
table, but that seems unlikely.  Tommi, can you put strace of x86info -mp 
somewhere?
That will confirm/deny whether we're at least asking the kernel to do sane 
things.


Indeed the bug happens when reading from /dev/mem:

https://pastebin.com/raw/ZEJGQP1X

# strace -f -y x86info -mp
[...]
open("/dev/mem", O_RDONLY)  = 3
lseek(3, 1038, SEEK_SET)  = 1038
read(3, "\300\235", 2)= 2
lseek(3, 646144, SEEK_SET)= 646144
read(3, 
"\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 1043, SEEK_SET)  = 1043
read(3, "w\2", 2) = 2
lseek(3, 645120, SEEK_SET)= 645120
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 654336, SEEK_SET)= 654336
read(3, 
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
1024) = 1024

lseek(3, 983040, SEEK_SET)= 983040
read(3, 
"IFE$\245S\0\0\1\0\0\0\0\360y\0\0\360\220\260\30\237{=\23\10\17\\276\17\0"..., 
65536) = 65536

lseek(3, 917504, SEEK_SET)= 917504
read(3, 
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 
65536) = 65536

lseek(3, 524288, SEEK_SET)= 524288
read(3,  )= ?
+++ killed by SIGSEGV +++


Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

2017-03-30 Thread Bjorn Helgaas
On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> Hi Bjorn,
> 
> On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > > These don't have .remove:
> > > > 
> > > >   imx6_pcie_driver
> > > >   ls_pcie_driver
> > > >   armada8k_pcie_driver
> > > >   artpec6_pcie_driver
> > > >   dw_plat_pcie_driver
> > > >   hisi_pcie_driver
> > > >   hisi_pcie_almost_ecam_driver
> > > >   spear13xx_pcie_driver
> > > >   gen_pci_driver
> > > 
> > > I think these are all technically broken.
> > 
> > Can we fix them all at the same time as you fix Rockchip?  Maybe we
> > should have a series that adds ".suppress_bind_attrs = true" to all
> > these drivers,
> 
> Sure, I can do that.
> 
> > including Rockchip.
> 
> Huh? Why? So I can revert that in the next patch?
> 
> > Then you could have this current 
> > series to make Rockchip modular on top, if there's still value in it.
> 
> I do see value in it. That's the whole reason I wrote this patchset.
> It's useful for stressing out certain behaviors that will happen all the
> time (i.e., boot-time initialization, from platform probe, to bus init,
> to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> much faster than reboot testing.

I didn't phrase that very well.  There's certainly value in stressing
the bind/unbind paths, but I thought the primary reason you wrote this
was to fix the fact that you could crash the system like this:

  # echo f800.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
  # lspci

>From my point of view, that's the issue that *has* to be fixed.
Better test coverage is icing.

It sounds like several drivers have that same issue, and the simplest
possible fix is to set .suppress_bind_attrs, so I suggested doing that 
so it's easy to analyze the tree as a whole and say "these drivers
all have the same problem, and all the fixes look the same."

I guess if you'd rather skip that for Rockchip and apply a more
complicated fix there, I could go along with that.  But I don't think
it would hurt anything to set .suppress_bind_attrs, then remove it
when you add module support.  The concepts of .suppress_bind_attrs and
modularity are related, and doing this in a separate patch would make
it a nice example to follow if somebody wants to make other drivers
modular as well.

> Personally, I'd rather just patch the other drivers, and you can wait
> until I follow through on that promise before applying my existing work
> for the Rockchip driver, if that's what you'd prefer.

It's not so much a question of using the Rockchip change as a stick.
I'm just thinking that it makes a more logical progression to fix the
more important issue globally first.

> > If we find a common problem, I'd like to fix it everywhere we know
> > about so it doesn't get forgotten or copied to even more places.
> 
> Sure. But you only just pointed out how broken several drivers were; I
> didn't really notice :)

Yeah, you're right, I had in my head the idea that if we've identified
the same problem in several drivers, we should fix them all, but I
neglected to turn that into words.

Bjorn


Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

2017-03-30 Thread Bjorn Helgaas
On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> Hi Bjorn,
> 
> On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > > These don't have .remove:
> > > > 
> > > >   imx6_pcie_driver
> > > >   ls_pcie_driver
> > > >   armada8k_pcie_driver
> > > >   artpec6_pcie_driver
> > > >   dw_plat_pcie_driver
> > > >   hisi_pcie_driver
> > > >   hisi_pcie_almost_ecam_driver
> > > >   spear13xx_pcie_driver
> > > >   gen_pci_driver
> > > 
> > > I think these are all technically broken.
> > 
> > Can we fix them all at the same time as you fix Rockchip?  Maybe we
> > should have a series that adds ".suppress_bind_attrs = true" to all
> > these drivers,
> 
> Sure, I can do that.
> 
> > including Rockchip.
> 
> Huh? Why? So I can revert that in the next patch?
> 
> > Then you could have this current 
> > series to make Rockchip modular on top, if there's still value in it.
> 
> I do see value in it. That's the whole reason I wrote this patchset.
> It's useful for stressing out certain behaviors that will happen all the
> time (i.e., boot-time initialization, from platform probe, to bus init,
> to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> much faster than reboot testing.

I didn't phrase that very well.  There's certainly value in stressing
the bind/unbind paths, but I thought the primary reason you wrote this
was to fix the fact that you could crash the system like this:

  # echo f800.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
  # lspci

>From my point of view, that's the issue that *has* to be fixed.
Better test coverage is icing.

It sounds like several drivers have that same issue, and the simplest
possible fix is to set .suppress_bind_attrs, so I suggested doing that 
so it's easy to analyze the tree as a whole and say "these drivers
all have the same problem, and all the fixes look the same."

I guess if you'd rather skip that for Rockchip and apply a more
complicated fix there, I could go along with that.  But I don't think
it would hurt anything to set .suppress_bind_attrs, then remove it
when you add module support.  The concepts of .suppress_bind_attrs and
modularity are related, and doing this in a separate patch would make
it a nice example to follow if somebody wants to make other drivers
modular as well.

> Personally, I'd rather just patch the other drivers, and you can wait
> until I follow through on that promise before applying my existing work
> for the Rockchip driver, if that's what you'd prefer.

It's not so much a question of using the Rockchip change as a stick.
I'm just thinking that it makes a more logical progression to fix the
more important issue globally first.

> > If we find a common problem, I'd like to fix it everywhere we know
> > about so it doesn't get forgotten or copied to even more places.
> 
> Sure. But you only just pointed out how broken several drivers were; I
> didn't really notice :)

Yeah, you're right, I had in my head the idea that if we've identified
the same problem in several drivers, we should fix them all, but I
neglected to turn that into words.

Bjorn


Re: [PATCH RFC] staging: ks7010: remove custom Michael MIC implementation

2017-03-30 Thread Joe Perches
On Fri, 2017-03-31 at 15:47 +1100, Tobin C. Harding wrote:
> ks7010 currently uses a custom implementation of the Michael MIC
> algorithm. The kernel has an implementation of this algorithm
> already, we should use it.

ok, trivia:

Do please run your patch through checkpatch and fix a few style nits.

$ ./scripts/checkpatch.pl ~/1.mbox --strict --terse | cut -f2- -d":"
161: WARNING: line over 80 characters
170: WARNING: Missing a blank line after declarations
205: WARNING: line over 80 characters
229: WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
263: WARNING: Prefer using "%s", __func__ to embedded function names
264: ERROR: code indent should use tabs where possible
264: WARNING: quoted string split across lines
272: WARNING: Prefer using "%s", __func__ to embedded function names
273: ERROR: code indent should use tabs where possible
273: WARNING: quoted string split across lines
325: WARNING: Prefer pr_warn(... to pr_warning(...
 2 errors, 9 warnings, 0 checks, 262 lines checked

and

> diff --git a/drivers/staging/ks7010/mic.c b/drivers/staging/ks7010/mic.c
[]
> +int ks_wlan_mic(struct crypto_shash *tfm_michael, u8 *key,
> + u8 priority, u8 *data, size_t data_len, u8 *mic)
> +{
> + SHASH_DESC_ON_STACK(desc, tfm_michael);
> + u8 hdr[ETH_HLEN + 2]; /* 16 bytes */

It might be better to declare a struct for this

> + hdr[ETH_ALEN * 2] = priority;
> + hdr[ETH_ALEN * 2 + 1] = 0;
> + hdr[ETH_ALEN * 2 + 2] = 0;
> + hdr[ETH_ALEN * 2 + 3] = 0;

And use struct members here.



[PATCH v4 03/11] Move GET_FIELD/SET_FIELD to vas.h

2017-03-30 Thread Sukadev Bhattiprolu
Move the GET_FIELD and SET_FIELD macros to vas.h as VAS and other
users of VAS, including NX-842 can use those macros.

There is a lot of related code between the VAS/NX kernel drivers
and skiboot. For consistency switch the order of parameters in
SET_FIELD to match the order in skiboot.

Signed-off-by: Sukadev Bhattiprolu 
Reviewed-by: Dan Streetman 
---

Changelog[v3]
- Fix order of parameters in nx-842 driver.
---
 arch/powerpc/include/uapi/asm/vas.h | 8 
 drivers/crypto/nx/nx-842-powernv.c  | 7 ---
 drivers/crypto/nx/nx-842.h  | 5 -
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/vas.h 
b/arch/powerpc/include/uapi/asm/vas.h
index ddfe046..21249f5 100644
--- a/arch/powerpc/include/uapi/asm/vas.h
+++ b/arch/powerpc/include/uapi/asm/vas.h
@@ -22,4 +22,12 @@
 #define VAS_THRESH_FIFO_GT_QTR_FULL2
 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
 
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v)(((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m)(__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val)   \
+   (((v) & ~(m)) | typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
 #endif /* _UAPI_MISC_VAS_H */
diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
index 1710f80..3abb045 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -22,6 +22,7 @@
 
 #include 
 #include 
+#include 
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dan Streetman ");
@@ -424,9 +425,9 @@ static int nx842_powernv_function(const unsigned char *in, 
unsigned int inlen,
 
/* set up CCW */
ccw = 0;
-   ccw = SET_FIELD(ccw, CCW_CT, nx842_ct);
-   ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */
-   ccw = SET_FIELD(ccw, CCW_FC_842, fc);
+   ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
+   ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
+   ccw = SET_FIELD(CCW_FC_842, ccw, fc);
 
/* set up CRB's CSB addr */
csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index a4eee3b..30929bd 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -100,11 +100,6 @@ static inline unsigned long nx842_get_pa(void *addr)
return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr);
 }
 
-/* Get/Set bit fields */
-#define MASK_LSH(m)(__builtin_ffsl(m) - 1)
-#define GET_FIELD(v, m)(((v) & (m)) >> MASK_LSH(m))
-#define SET_FIELD(v, m, val)   (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m)))
-
 /**
  * This provides the driver's constraints.  Different nx842 implementations
  * may have varying requirements.  The constraints are:
-- 
2.7.4



[PATCH v4 03/11] Move GET_FIELD/SET_FIELD to vas.h

2017-03-30 Thread Sukadev Bhattiprolu
Move the GET_FIELD and SET_FIELD macros to vas.h as VAS and other
users of VAS, including NX-842 can use those macros.

There is a lot of related code between the VAS/NX kernel drivers
and skiboot. For consistency switch the order of parameters in
SET_FIELD to match the order in skiboot.

Signed-off-by: Sukadev Bhattiprolu 
Reviewed-by: Dan Streetman 
---

Changelog[v3]
- Fix order of parameters in nx-842 driver.
---
 arch/powerpc/include/uapi/asm/vas.h | 8 
 drivers/crypto/nx/nx-842-powernv.c  | 7 ---
 drivers/crypto/nx/nx-842.h  | 5 -
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/vas.h 
b/arch/powerpc/include/uapi/asm/vas.h
index ddfe046..21249f5 100644
--- a/arch/powerpc/include/uapi/asm/vas.h
+++ b/arch/powerpc/include/uapi/asm/vas.h
@@ -22,4 +22,12 @@
 #define VAS_THRESH_FIFO_GT_QTR_FULL2
 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
 
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v)(((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m)(__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val)   \
+   (((v) & ~(m)) | typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
 #endif /* _UAPI_MISC_VAS_H */
diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
index 1710f80..3abb045 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -22,6 +22,7 @@
 
 #include 
 #include 
+#include 
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dan Streetman ");
@@ -424,9 +425,9 @@ static int nx842_powernv_function(const unsigned char *in, 
unsigned int inlen,
 
/* set up CCW */
ccw = 0;
-   ccw = SET_FIELD(ccw, CCW_CT, nx842_ct);
-   ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */
-   ccw = SET_FIELD(ccw, CCW_FC_842, fc);
+   ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
+   ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
+   ccw = SET_FIELD(CCW_FC_842, ccw, fc);
 
/* set up CRB's CSB addr */
csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index a4eee3b..30929bd 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -100,11 +100,6 @@ static inline unsigned long nx842_get_pa(void *addr)
return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr);
 }
 
-/* Get/Set bit fields */
-#define MASK_LSH(m)(__builtin_ffsl(m) - 1)
-#define GET_FIELD(v, m)(((v) & (m)) >> MASK_LSH(m))
-#define SET_FIELD(v, m, val)   (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m)))
-
 /**
  * This provides the driver's constraints.  Different nx842 implementations
  * may have varying requirements.  The constraints are:
-- 
2.7.4



Re: [PATCH RFC] staging: ks7010: remove custom Michael MIC implementation

2017-03-30 Thread Joe Perches
On Fri, 2017-03-31 at 15:47 +1100, Tobin C. Harding wrote:
> ks7010 currently uses a custom implementation of the Michael MIC
> algorithm. The kernel has an implementation of this algorithm
> already, we should use it.

ok, trivia:

Do please run your patch through checkpatch and fix a few style nits.

$ ./scripts/checkpatch.pl ~/1.mbox --strict --terse | cut -f2- -d":"
161: WARNING: line over 80 characters
170: WARNING: Missing a blank line after declarations
205: WARNING: line over 80 characters
229: WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
263: WARNING: Prefer using "%s", __func__ to embedded function names
264: ERROR: code indent should use tabs where possible
264: WARNING: quoted string split across lines
272: WARNING: Prefer using "%s", __func__ to embedded function names
273: ERROR: code indent should use tabs where possible
273: WARNING: quoted string split across lines
325: WARNING: Prefer pr_warn(... to pr_warning(...
 2 errors, 9 warnings, 0 checks, 262 lines checked

and

> diff --git a/drivers/staging/ks7010/mic.c b/drivers/staging/ks7010/mic.c
[]
> +int ks_wlan_mic(struct crypto_shash *tfm_michael, u8 *key,
> + u8 priority, u8 *data, size_t data_len, u8 *mic)
> +{
> + SHASH_DESC_ON_STACK(desc, tfm_michael);
> + u8 hdr[ETH_HLEN + 2]; /* 16 bytes */

It might be better to declare a struct for this

> + hdr[ETH_ALEN * 2] = priority;
> + hdr[ETH_ALEN * 2 + 1] = 0;
> + hdr[ETH_ALEN * 2 + 2] = 0;
> + hdr[ETH_ALEN * 2 + 3] = 0;

And use struct members here.



[PATCH v4 00/11] Enable VAS

2017-03-30 Thread Sukadev Bhattiprolu
Power9 introduces a hardware subsystem referred to as the Virtual
Accelerator Switchboard (VAS). VAS allows kernel subsystems and user
space processes to directly access the Nest Accelerator (NX) engines
which implement compression and encryption algorithms in the hardware.

NX has been in Power processors since Power7+, but access to the NX
engines was through the 'icswx' instruction which is only available
to the kernel/hypervisor. Starting with Power9, access to the NX
engines is provided to both kernel and user space processes through
VAS.

The switchboard (i.e VAS) multiplexes accesses between "receivers" and
"senders", where the "receivers" are typically the NX engines and
"senders" are the kernel subsystems and user processors that wish to
access the receivers (NX engines).  Once a sender is "connected" to
a receiver through the switchboard, the senders can submit compression/
encryption requests to the hardware using the new (PowerISA 3.0)
"copy" and "paste" instructions.

In the initial OPAL and PowerNV kernel patchsets, the "senders" can
only be kernel subsystems (eg NX-842 driver). A follow-on patch set 
will allow senders to be user-space processes.

This kernel patch set configures the VAS subsystems and provides
kernel interfaces to drivers like NX-842 to open receive and send
windows in VAS and to submit requests to the NX engine.

This patch set that has been tested in a Simics Power9 environment using
a modified NX-842 kernel driver and a compression self-test module from
Power8. The corresponding OPAL patchset for VAS support was posted to
skiboot mailing list:

https://lists.ozlabs.org/pipermail/skiboot/2017-January/006193.html

OPAL and kernel patchsets for NX-842 driver will be posted separately.
All four patchsets are needed to effectively use VAS/NX in Power9.

Thanks to input from Ben Herrenschmidt, Michael Neuling, Michael Ellerman
and Haren Myneni.

Changelog[v4]
Comments from Michael Neuling:
- Move VAS code from drivers/misc/vas to arch/powerpc/platforms/powernv
  since VAS only provides interfaces to other drivers like NX-842.
- Drop vas-internal.h and use vas.h in separate dirs for VAS
  internal, kernel API and user API
- Rather than create 6 separate device tree properties windows
  and window context, combine them into 6 "reg" properties.
- Drop vas_window_reset() since windows are reset/cleared before
  being assigned to kernel/users.
- Use ilog2() and radix_enabled() helpers

Changelog[v3]
- Rebase to v4.11-rc1
- Add interfaces to initialize send/receive window attributes to
  defaults that drivers can use (see arch/powerpc/include/asm/vas.h)
- Modify interface vas_paste() to return 0 or error code
- Fix a bug in setting Translation Control Mode (0b11 not 0x11)
- Enable send-window-credit checking 
- Reorg code  in vas_win_close()
- Minor reorgs and tweaks to register field settings to make it
  easier to add support for user space windows.
- Skip writing to read-only registers
- Start window indexing from 0 rather than 1

Changelog[v2]
- Use vas-id, HVWC, UWC and paste address, entries from device tree
  rather than defining/computing them in kernel and reorg code.

Sukadev Bhattiprolu (11):
  Add Power9 PVR
  VAS: Define macros, register fields and structures
  Move GET_FIELD/SET_FIELD to vas.h
  VAS: Define vas_init() and vas_exit()
  VAS: Define helpers for access MMIO regions
  VAS: Define helpers to init window context
  VAS: Define helpers to alloc/free windows
  VAS: Define vas_rx_win_open() interface
  VAS: Define vas_win_close() interface
  VAS: Define vas_tx_win_open()
  VAS: Define copy/paste interfaces

 arch/powerpc/include/asm/reg.h  |1 +
 arch/powerpc/include/asm/vas.h  |  141 
 arch/powerpc/include/uapi/asm/vas.h |   33 +
 arch/powerpc/platforms/powernv/Kconfig  |   14 +
 arch/powerpc/platforms/powernv/Makefile |1 +
 arch/powerpc/platforms/powernv/copy-paste.h |   74 ++
 arch/powerpc/platforms/powernv/vas-window.c | 1003 +++
 arch/powerpc/platforms/powernv/vas.c|  145 
 arch/powerpc/platforms/powernv/vas.h|  470 +
 drivers/crypto/nx/nx-842-powernv.c  |7 +-
 drivers/crypto/nx/nx-842.h  |5 -
 11 files changed, 1886 insertions(+), 8 deletions(-)
 create mode 100644 arch/powerpc/include/asm/vas.h
 create mode 100644 arch/powerpc/include/uapi/asm/vas.h
 create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h
 create mode 100644 arch/powerpc/platforms/powernv/vas-window.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.h

-- 
2.7.4



[PATCH v4 00/11] Enable VAS

2017-03-30 Thread Sukadev Bhattiprolu
Power9 introduces a hardware subsystem referred to as the Virtual
Accelerator Switchboard (VAS). VAS allows kernel subsystems and user
space processes to directly access the Nest Accelerator (NX) engines
which implement compression and encryption algorithms in the hardware.

NX has been in Power processors since Power7+, but access to the NX
engines was through the 'icswx' instruction which is only available
to the kernel/hypervisor. Starting with Power9, access to the NX
engines is provided to both kernel and user space processes through
VAS.

The switchboard (i.e VAS) multiplexes accesses between "receivers" and
"senders", where the "receivers" are typically the NX engines and
"senders" are the kernel subsystems and user processors that wish to
access the receivers (NX engines).  Once a sender is "connected" to
a receiver through the switchboard, the senders can submit compression/
encryption requests to the hardware using the new (PowerISA 3.0)
"copy" and "paste" instructions.

In the initial OPAL and PowerNV kernel patchsets, the "senders" can
only be kernel subsystems (eg NX-842 driver). A follow-on patch set 
will allow senders to be user-space processes.

This kernel patch set configures the VAS subsystems and provides
kernel interfaces to drivers like NX-842 to open receive and send
windows in VAS and to submit requests to the NX engine.

This patch set that has been tested in a Simics Power9 environment using
a modified NX-842 kernel driver and a compression self-test module from
Power8. The corresponding OPAL patchset for VAS support was posted to
skiboot mailing list:

https://lists.ozlabs.org/pipermail/skiboot/2017-January/006193.html

OPAL and kernel patchsets for NX-842 driver will be posted separately.
All four patchsets are needed to effectively use VAS/NX in Power9.

Thanks to input from Ben Herrenschmidt, Michael Neuling, Michael Ellerman
and Haren Myneni.

Changelog[v4]
Comments from Michael Neuling:
- Move VAS code from drivers/misc/vas to arch/powerpc/platforms/powernv
  since VAS only provides interfaces to other drivers like NX-842.
- Drop vas-internal.h and use vas.h in separate dirs for VAS
  internal, kernel API and user API
- Rather than create 6 separate device tree properties windows
  and window context, combine them into 6 "reg" properties.
- Drop vas_window_reset() since windows are reset/cleared before
  being assigned to kernel/users.
- Use ilog2() and radix_enabled() helpers

Changelog[v3]
- Rebase to v4.11-rc1
- Add interfaces to initialize send/receive window attributes to
  defaults that drivers can use (see arch/powerpc/include/asm/vas.h)
- Modify interface vas_paste() to return 0 or error code
- Fix a bug in setting Translation Control Mode (0b11 not 0x11)
- Enable send-window-credit checking 
- Reorg code  in vas_win_close()
- Minor reorgs and tweaks to register field settings to make it
  easier to add support for user space windows.
- Skip writing to read-only registers
- Start window indexing from 0 rather than 1

Changelog[v2]
- Use vas-id, HVWC, UWC and paste address, entries from device tree
  rather than defining/computing them in kernel and reorg code.

Sukadev Bhattiprolu (11):
  Add Power9 PVR
  VAS: Define macros, register fields and structures
  Move GET_FIELD/SET_FIELD to vas.h
  VAS: Define vas_init() and vas_exit()
  VAS: Define helpers for access MMIO regions
  VAS: Define helpers to init window context
  VAS: Define helpers to alloc/free windows
  VAS: Define vas_rx_win_open() interface
  VAS: Define vas_win_close() interface
  VAS: Define vas_tx_win_open()
  VAS: Define copy/paste interfaces

 arch/powerpc/include/asm/reg.h  |1 +
 arch/powerpc/include/asm/vas.h  |  141 
 arch/powerpc/include/uapi/asm/vas.h |   33 +
 arch/powerpc/platforms/powernv/Kconfig  |   14 +
 arch/powerpc/platforms/powernv/Makefile |1 +
 arch/powerpc/platforms/powernv/copy-paste.h |   74 ++
 arch/powerpc/platforms/powernv/vas-window.c | 1003 +++
 arch/powerpc/platforms/powernv/vas.c|  145 
 arch/powerpc/platforms/powernv/vas.h|  470 +
 drivers/crypto/nx/nx-842-powernv.c  |7 +-
 drivers/crypto/nx/nx-842.h  |5 -
 11 files changed, 1886 insertions(+), 8 deletions(-)
 create mode 100644 arch/powerpc/include/asm/vas.h
 create mode 100644 arch/powerpc/include/uapi/asm/vas.h
 create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h
 create mode 100644 arch/powerpc/platforms/powernv/vas-window.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.h

-- 
2.7.4



[PATCH v4 01/11] Add Power9 PVR

2017-03-30 Thread Sukadev Bhattiprolu
Add Power9 PVR

Signed-off-by: Sukadev Bhattiprolu 
---
 arch/powerpc/include/asm/reg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd..7a45ff7 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1225,6 +1225,7 @@
 #define PVR_POWER8E0x004B
 #define PVR_POWER8NVL  0x004C
 #define PVR_POWER8 0x004D
+#define PVR_POWER9 0x004E
 #define PVR_BE 0x0070
 #define PVR_PA6T   0x0090
 
-- 
2.7.4



[PATCH v4 01/11] Add Power9 PVR

2017-03-30 Thread Sukadev Bhattiprolu
Add Power9 PVR

Signed-off-by: Sukadev Bhattiprolu 
---
 arch/powerpc/include/asm/reg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd..7a45ff7 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1225,6 +1225,7 @@
 #define PVR_POWER8E0x004B
 #define PVR_POWER8NVL  0x004C
 #define PVR_POWER8 0x004D
+#define PVR_POWER9 0x004E
 #define PVR_BE 0x0070
 #define PVR_PA6T   0x0090
 
-- 
2.7.4



[PATCH v4 07/11] VAS: Define helpers to alloc/free windows

2017-03-30 Thread Sukadev Bhattiprolu
Define helpers to allocate/free VAS window objects. These will
be used in follow-on patches when opening/closing windows.

Signed-off-by: Sukadev Bhattiprolu 
---
 arch/powerpc/platforms/powernv/vas-window.c | 63 -
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 5ddcb63..0c178fe 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -120,7 +120,7 @@ static void unmap_wc_mmio_bars(struct vas_window *window)
  * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
  * Map these bus addresses and save the mapped kernel addresses in @window.
  */
-int map_wc_mmio_bars(struct vas_window *window)
+static int map_wc_mmio_bars(struct vas_window *window)
 {
int len;
uint64_t start;
@@ -437,6 +437,67 @@ int init_winctx_regs(struct vas_window *window, struct 
vas_winctx *winctx)
return 0;
 }
 
+DEFINE_SPINLOCK(vas_ida_lock);
+
+void vas_release_window_id(struct ida *ida, int winid)
+{
+   spin_lock(_ida_lock);
+   ida_remove(ida, winid);
+   spin_unlock(_ida_lock);
+}
+
+int vas_assign_window_id(struct ida *ida)
+{
+   int rc, winid;
+
+   rc = ida_pre_get(ida, GFP_KERNEL);
+   if (!rc)
+   return -EAGAIN;
+
+   spin_lock(_ida_lock);
+   rc = ida_get_new_above(ida, 0, );
+   spin_unlock(_ida_lock);
+
+   if (rc)
+   return rc;
+
+   if (winid > VAS_MAX_WINDOWS_PER_CHIP) {
+   pr_err("VAS: Too many (%d) open windows\n", winid);
+   vas_release_window_id(ida, winid);
+   return -EAGAIN;
+   }
+
+   return winid;
+}
+
+void vas_window_free(struct vas_window *window)
+{
+   unmap_wc_mmio_bars(window);
+   kfree(window->paste_addr_name);
+   kfree(window);
+}
+
+struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
+{
+   struct vas_window *window;
+
+   window = kzalloc(sizeof(*window), GFP_KERNEL);
+   if (!window)
+   return NULL;
+
+   window->vinst = vinst;
+   window->winid = id;
+
+   if (map_wc_mmio_bars(window))
+   goto out_free;
+
+   return window;
+
+out_free:
+   kfree(window);
+   return NULL;
+}
+
 /* stub for now */
 int vas_win_close(struct vas_window *window)
 {
-- 
2.7.4



[PATCH v4 07/11] VAS: Define helpers to alloc/free windows

2017-03-30 Thread Sukadev Bhattiprolu
Define helpers to allocate/free VAS window objects. These will
be used in follow-on patches when opening/closing windows.

Signed-off-by: Sukadev Bhattiprolu 
---
 arch/powerpc/platforms/powernv/vas-window.c | 63 -
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 5ddcb63..0c178fe 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -120,7 +120,7 @@ static void unmap_wc_mmio_bars(struct vas_window *window)
  * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
  * Map these bus addresses and save the mapped kernel addresses in @window.
  */
-int map_wc_mmio_bars(struct vas_window *window)
+static int map_wc_mmio_bars(struct vas_window *window)
 {
int len;
uint64_t start;
@@ -437,6 +437,67 @@ int init_winctx_regs(struct vas_window *window, struct 
vas_winctx *winctx)
return 0;
 }
 
+DEFINE_SPINLOCK(vas_ida_lock);
+
+void vas_release_window_id(struct ida *ida, int winid)
+{
+   spin_lock(_ida_lock);
+   ida_remove(ida, winid);
+   spin_unlock(_ida_lock);
+}
+
+int vas_assign_window_id(struct ida *ida)
+{
+   int rc, winid;
+
+   rc = ida_pre_get(ida, GFP_KERNEL);
+   if (!rc)
+   return -EAGAIN;
+
+   spin_lock(_ida_lock);
+   rc = ida_get_new_above(ida, 0, );
+   spin_unlock(_ida_lock);
+
+   if (rc)
+   return rc;
+
+   if (winid > VAS_MAX_WINDOWS_PER_CHIP) {
+   pr_err("VAS: Too many (%d) open windows\n", winid);
+   vas_release_window_id(ida, winid);
+   return -EAGAIN;
+   }
+
+   return winid;
+}
+
+void vas_window_free(struct vas_window *window)
+{
+   unmap_wc_mmio_bars(window);
+   kfree(window->paste_addr_name);
+   kfree(window);
+}
+
+struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
+{
+   struct vas_window *window;
+
+   window = kzalloc(sizeof(*window), GFP_KERNEL);
+   if (!window)
+   return NULL;
+
+   window->vinst = vinst;
+   window->winid = id;
+
+   if (map_wc_mmio_bars(window))
+   goto out_free;
+
+   return window;
+
+out_free:
+   kfree(window);
+   return NULL;
+}
+
 /* stub for now */
 int vas_win_close(struct vas_window *window)
 {
-- 
2.7.4



[PATCH v4 06/11] VAS: Define helpers to init window context

2017-03-30 Thread Sukadev Bhattiprolu
Define helpers to initialize window context registers of the VAS
hardware. These will be used in follow-on patches when opening/closing
VAS windows.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]
- Michael Neuling] Use ilog2(), radix_enabled() helpers;
  drop warning when 32-bit app uses VAS (a follow-on patch
  will check and return error). Set MSR_PR state to 0 for
  kernel (rather than reading from MSR).

Changelog[v3]
- Have caller, rather than init_xlate_regs() reset window regs
  so we don't reset any settings caller may already have set.
- Translation mode should be 0x3 (0b11) not 0x11.
- Skip initilaizing read-only registers NX_UTIL and NX_UTIL_SE
- Skip initializing adder registers from UWC - they are already
  initialized from the HVWC.
- Check winctx->user_win when setting translation registers
---
 arch/powerpc/platforms/powernv/vas-window.c | 299 
 arch/powerpc/platforms/powernv/vas.h|  55 +
 2 files changed, 354 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ec084d2..5ddcb63 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -11,9 +11,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vas.h"
 
+static int fault_winid;
+
 /*
  * Compute the paste address region for the window @window using the
  * ->win_base_addr and ->win_id_shift we got from device tree.
@@ -138,6 +141,302 @@ int map_wc_mmio_bars(struct vas_window *window)
return 0;
 }
 
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ *  offsets in a window context are valid registers and the valid
+ *  registers are not sequential. And, we can only write to offsets
+ *  with valid registers (or is that only in Simics?).
+ */
+void reset_window_regs(struct vas_window *window)
+{
+   write_hvwc_reg(window, VREG(LPID), 0ULL);
+   write_hvwc_reg(window, VREG(PID), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(AMR), 0ULL);
+   write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+   write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+   write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+   write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+   write_hvwc_reg(window, VREG(PSWID), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+   write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+   write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+   write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+   write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+   write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+   write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+   write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+   write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+   /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+   /*
+* The send and receive window credit adder registers are also
+* accessible from HVWC and have been initialized above. We don't
+* need to initialize from the OS/User Window Context, so skip
+* following calls:
+*
+*  write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+*  write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+*/
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for 

[PATCH v4 06/11] VAS: Define helpers to init window context

2017-03-30 Thread Sukadev Bhattiprolu
Define helpers to initialize window context registers of the VAS
hardware. These will be used in follow-on patches when opening/closing
VAS windows.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]
- Michael Neuling] Use ilog2(), radix_enabled() helpers;
  drop warning when 32-bit app uses VAS (a follow-on patch
  will check and return error). Set MSR_PR state to 0 for
  kernel (rather than reading from MSR).

Changelog[v3]
- Have caller, rather than init_xlate_regs() reset window regs
  so we don't reset any settings caller may already have set.
- Translation mode should be 0x3 (0b11) not 0x11.
- Skip initilaizing read-only registers NX_UTIL and NX_UTIL_SE
- Skip initializing adder registers from UWC - they are already
  initialized from the HVWC.
- Check winctx->user_win when setting translation registers
---
 arch/powerpc/platforms/powernv/vas-window.c | 299 
 arch/powerpc/platforms/powernv/vas.h|  55 +
 2 files changed, 354 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ec084d2..5ddcb63 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -11,9 +11,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vas.h"
 
+static int fault_winid;
+
 /*
  * Compute the paste address region for the window @window using the
  * ->win_base_addr and ->win_id_shift we got from device tree.
@@ -138,6 +141,302 @@ int map_wc_mmio_bars(struct vas_window *window)
return 0;
 }
 
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ *  offsets in a window context are valid registers and the valid
+ *  registers are not sequential. And, we can only write to offsets
+ *  with valid registers (or is that only in Simics?).
+ */
+void reset_window_regs(struct vas_window *window)
+{
+   write_hvwc_reg(window, VREG(LPID), 0ULL);
+   write_hvwc_reg(window, VREG(PID), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+   write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(AMR), 0ULL);
+   write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+   write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+   write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+   write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+   write_hvwc_reg(window, VREG(PSWID), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+   write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+   write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+   write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+   write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+   write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+   write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+   write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+   write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+   write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+   write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+   write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+   /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+   /*
+* The send and receive window credit adder registers are also
+* accessible from HVWC and have been initialized above. We don't
+* need to initialize from the OS/User Window Context, so skip
+* following calls:
+*
+*  write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+*  write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+*/
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve 

[PATCH v4 08/11] VAS: Define vas_rx_win_open() interface

2017-03-30 Thread Sukadev Bhattiprolu
Define the vas_rx_win_open() interface. This interface is intended to be
used by the Nest Accelerator (NX) driver(s) to setup receive windows for
one or more NX engines (which implement compression/encryption algorithms
in the hardware).

Follow-on patches will provide an interface to close the window and to open
a send window that kenrel subsystems can use to access the NX engines.

The interface to open a receive window is expected to be invoked for each
instance of VAS in the system.

Signed-off-by: Sukadev Bhattiprolu 
---

Changelog[v3]:
- Fault receive windows must enable interrupts and disable
  notifications. NX Windows are opposite.
- Use macros rather than enum for threshold-control mode
- Ignore irq_ports for in-kernel windows. They are needed for
  user space windows and will be added later
---
 arch/powerpc/include/asm/vas.h  |  45 ++
 arch/powerpc/platforms/powernv/vas-window.c | 205 +++-
 arch/powerpc/platforms/powernv/vas.h|  11 ++
 3 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index e2575d5..09ea6f7 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -31,4 +31,49 @@ enum vas_cop_type {
VAS_COP_TYPE_MAX,
 };
 
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+   void *rx_fifo;
+   int rx_fifo_size;
+   int wcreds_max;
+
+   bool pin_win;
+   bool rej_no_credit;
+   bool tx_wcred_mode;
+   bool rx_wcred_mode;
+   bool tx_win_ord_mode;
+   bool rx_win_ord_mode;
+   bool data_stamp;
+   bool nx_win;
+   bool fault_win;
+   bool notify_disable;
+   bool intr_disable;
+   bool notify_early;
+
+   int lnotify_lpid;
+   int lnotify_pid;
+   int lnotify_tid;
+   int pswid;
+
+   int tc_mode;
+};
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+extern void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr,
+   enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+extern struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+   struct vas_rx_win_attr *attr);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 0c178fe..04f6bb2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -477,7 +477,7 @@ void vas_window_free(struct vas_window *window)
kfree(window);
 }
 
-struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
+static struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
 {
struct vas_window *window;
 
@@ -503,3 +503,206 @@ int vas_win_close(struct vas_window *window)
 {
return -1;
 }
+
+struct vas_window *get_vinstance_rxwin(struct vas_instance *vinst,
+   enum vas_cop_type cop)
+{
+   struct vas_window *rxwin;
+
+   mutex_lock(>mutex);
+
+   rxwin = vinst->rxwin[cop];
+   if (rxwin)
+   atomic_inc(>num_txwins);
+
+   mutex_unlock(>mutex);
+
+   return rxwin;
+}
+
+static void set_vinstance_rxwin(struct vas_instance *vinst,
+   enum vas_cop_type cop, struct vas_window *window)
+{
+   mutex_lock(>mutex);
+
+   /*
+* There should only be one receive window for a coprocessor type.
+*/
+   WARN_ON_ONCE(vinst->rxwin[cop]);
+   vinst->rxwin[cop] = window;
+
+   mutex_unlock(>mutex);
+}
+
+static void init_winctx_for_rxwin(struct vas_window *rxwin,
+   struct vas_rx_win_attr *rxattr,
+   struct vas_winctx *winctx)
+{
+   /*
+* We first zero (memset()) all fields and only set non-zero fields.
+* Following fields are 0/false but maybe deserve a comment:
+*
+*  ->user_win  No support for user Rx windows yet
+*  ->notify_os_intr_regIn powerNV, send intrs to HV
+*  ->notify_disableFalse for NX windows
+*  ->intr_disable  False for Fault Windows
+*  ->xtra_writeFalse for NX windows
+*  ->notify_early  NA for NX windows
+*  ->rsvd_txbuf_count  NA for Rx windows
+*  ->lpid, ->pid, ->tidNA for Rx windows
+*/
+
+   memset(winctx, 0, sizeof(struct vas_winctx));
+
+   winctx->rx_fifo = rxattr->rx_fifo;
+   winctx->rx_fifo_size = rxattr->rx_fifo_size;
+   winctx->wcreds_max = rxattr->wcreds_max ?: 

[PATCH v4 09/11] VAS: Define vas_win_close() interface

2017-03-30 Thread Sukadev Bhattiprolu
Define the vas_win_close() interface which should be used to close a
send or receive windows.

While the hardware configurations required to open send and receive windows
differ, the configuration to close a window is the same for both. So we use
a single interface to close the window.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]:
- Drop the poll for credits return (we can set the required credit,
  but cannot really find the available credit at a point in time)

Changelog[v3]:
- Fix order of parameters in GET_FIELD().
- Update references and sequence for closing/quiescing a window.
---
 arch/powerpc/include/asm/vas.h  |  7 +++
 arch/powerpc/platforms/powernv/vas-window.c | 96 +++--
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 09ea6f7..c923b8f 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -76,4 +76,11 @@ extern void vas_init_rx_win_attr(struct vas_rx_win_attr 
*rxattr,
 extern struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
struct vas_rx_win_attr *attr);
 
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 04f6bb2..7b1a36c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -470,7 +470,7 @@ int vas_assign_window_id(struct ida *ida)
return winid;
 }
 
-void vas_window_free(struct vas_window *window)
+static void vas_window_free(struct vas_window *window)
 {
unmap_wc_mmio_bars(window);
kfree(window->paste_addr_name);
@@ -498,10 +498,12 @@ static struct vas_window *vas_window_alloc(struct 
vas_instance *vinst, int id)
return NULL;
 }
 
-/* stub for now */
-int vas_win_close(struct vas_window *window)
+static void put_rx_win(struct vas_window *rxwin)
 {
-   return -1;
+   /* Better not be a send window! */
+   WARN_ON_ONCE(rxwin->tx_win);
+
+   atomic_dec(>num_txwins);
 }
 
 struct vas_window *get_vinstance_rxwin(struct vas_instance *vinst,
@@ -706,3 +708,89 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
vas_release_window_id(>ida, rxwin->winid);
return ERR_PTR(rc);
 }
+
+static void poll_window_busy_state(struct vas_window *window)
+{
+   int busy;
+   uint64_t val;
+
+retry:
+   /*
+* Poll Window Busy flag
+*/
+   val = read_hvwc_reg(window, VREG(WIN_STATUS));
+   busy = GET_FIELD(VAS_WIN_BUSY, val);
+   if (busy) {
+   val = 0;
+   schedule_timeout(2000);
+   goto retry;
+   }
+}
+
+static void poll_window_castout(struct vas_window *window)
+{
+   int cached;
+   uint64_t val;
+
+   /* Cast window context out of the cache */
+retry:
+   val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
+   cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
+   if (cached) {
+   val = 0ULL;
+   val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
+   val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
+   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+   schedule_timeout(2000);
+   goto retry;
+   }
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ * - disable new paste operations (unmap paste address)
+ * - Poll for the "Window Busy" bit to be cleared
+ * - Clear the Open/Enable bit for the Window.
+ * - Poll for return of window Credits (implies FIFO empty for Rx win?)
+ * - Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *window)
+{
+   uint64_t val;
+
+   if (!window)
+   return 0;
+
+   if (!window->tx_win && atomic_read(>num_txwins) != 0) {
+   pr_devel("VAS: Attempting to close an active Rx window!\n");
+   WARN_ON_ONCE(1);
+   return -EAGAIN;
+   }
+
+   unmap_wc_paste_kaddr(window);
+
+   poll_window_busy_state(window);
+
+   /* Unpin window from cache and close it */
+   val = read_hvwc_reg(window, VREG(WINCTL));
+   val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+   val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+   write_hvwc_reg(window, VREG(WINCTL), val);
+
+   poll_window_castout(window);
+
+   /* if send window, drop reference to matching receive window */
+   if (window->tx_win)
+   put_rx_win(window->rxwin);
+
+   

[PATCH v4 08/11] VAS: Define vas_rx_win_open() interface

2017-03-30 Thread Sukadev Bhattiprolu
Define the vas_rx_win_open() interface. This interface is intended to be
used by the Nest Accelerator (NX) driver(s) to setup receive windows for
one or more NX engines (which implement compression/encryption algorithms
in the hardware).

Follow-on patches will provide an interface to close the window and to open
a send window that kenrel subsystems can use to access the NX engines.

The interface to open a receive window is expected to be invoked for each
instance of VAS in the system.

Signed-off-by: Sukadev Bhattiprolu 
---

Changelog[v3]:
- Fault receive windows must enable interrupts and disable
  notifications. NX Windows are opposite.
- Use macros rather than enum for threshold-control mode
- Ignore irq_ports for in-kernel windows. They are needed for
  user space windows and will be added later
---
 arch/powerpc/include/asm/vas.h  |  45 ++
 arch/powerpc/platforms/powernv/vas-window.c | 205 +++-
 arch/powerpc/platforms/powernv/vas.h|  11 ++
 3 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index e2575d5..09ea6f7 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -31,4 +31,49 @@ enum vas_cop_type {
VAS_COP_TYPE_MAX,
 };
 
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+   void *rx_fifo;
+   int rx_fifo_size;
+   int wcreds_max;
+
+   bool pin_win;
+   bool rej_no_credit;
+   bool tx_wcred_mode;
+   bool rx_wcred_mode;
+   bool tx_win_ord_mode;
+   bool rx_win_ord_mode;
+   bool data_stamp;
+   bool nx_win;
+   bool fault_win;
+   bool notify_disable;
+   bool intr_disable;
+   bool notify_early;
+
+   int lnotify_lpid;
+   int lnotify_pid;
+   int lnotify_tid;
+   int pswid;
+
+   int tc_mode;
+};
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+extern void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr,
+   enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+extern struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+   struct vas_rx_win_attr *attr);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 0c178fe..04f6bb2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -477,7 +477,7 @@ void vas_window_free(struct vas_window *window)
kfree(window);
 }
 
-struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
+static struct vas_window *vas_window_alloc(struct vas_instance *vinst, int id)
 {
struct vas_window *window;
 
@@ -503,3 +503,206 @@ int vas_win_close(struct vas_window *window)
 {
return -1;
 }
+
+struct vas_window *get_vinstance_rxwin(struct vas_instance *vinst,
+   enum vas_cop_type cop)
+{
+   struct vas_window *rxwin;
+
+   mutex_lock(>mutex);
+
+   rxwin = vinst->rxwin[cop];
+   if (rxwin)
+   atomic_inc(>num_txwins);
+
+   mutex_unlock(>mutex);
+
+   return rxwin;
+}
+
+static void set_vinstance_rxwin(struct vas_instance *vinst,
+   enum vas_cop_type cop, struct vas_window *window)
+{
+   mutex_lock(>mutex);
+
+   /*
+* There should only be one receive window for a coprocessor type.
+*/
+   WARN_ON_ONCE(vinst->rxwin[cop]);
+   vinst->rxwin[cop] = window;
+
+   mutex_unlock(>mutex);
+}
+
+static void init_winctx_for_rxwin(struct vas_window *rxwin,
+   struct vas_rx_win_attr *rxattr,
+   struct vas_winctx *winctx)
+{
+   /*
+* We first zero (memset()) all fields and only set non-zero fields.
+* Following fields are 0/false but maybe deserve a comment:
+*
+*  ->user_win  No support for user Rx windows yet
+*  ->notify_os_intr_regIn powerNV, send intrs to HV
+*  ->notify_disableFalse for NX windows
+*  ->intr_disable  False for Fault Windows
+*  ->xtra_writeFalse for NX windows
+*  ->notify_early  NA for NX windows
+*  ->rsvd_txbuf_count  NA for Rx windows
+*  ->lpid, ->pid, ->tidNA for Rx windows
+*/
+
+   memset(winctx, 0, sizeof(struct vas_winctx));
+
+   winctx->rx_fifo = rxattr->rx_fifo;
+   winctx->rx_fifo_size = rxattr->rx_fifo_size;
+   winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+   

[PATCH v4 09/11] VAS: Define vas_win_close() interface

2017-03-30 Thread Sukadev Bhattiprolu
Define the vas_win_close() interface which should be used to close a
send or receive windows.

While the hardware configurations required to open send and receive windows
differ, the configuration to close a window is the same for both. So we use
a single interface to close the window.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]:
- Drop the poll for credits return (we can set the required credit,
  but cannot really find the available credit at a point in time)

Changelog[v3]:
- Fix order of parameters in GET_FIELD().
- Update references and sequence for closing/quiescing a window.
---
 arch/powerpc/include/asm/vas.h  |  7 +++
 arch/powerpc/platforms/powernv/vas-window.c | 96 +++--
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 09ea6f7..c923b8f 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -76,4 +76,11 @@ extern void vas_init_rx_win_attr(struct vas_rx_win_attr 
*rxattr,
 extern struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
struct vas_rx_win_attr *attr);
 
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 04f6bb2..7b1a36c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -470,7 +470,7 @@ int vas_assign_window_id(struct ida *ida)
return winid;
 }
 
-void vas_window_free(struct vas_window *window)
+static void vas_window_free(struct vas_window *window)
 {
unmap_wc_mmio_bars(window);
kfree(window->paste_addr_name);
@@ -498,10 +498,12 @@ static struct vas_window *vas_window_alloc(struct 
vas_instance *vinst, int id)
return NULL;
 }
 
-/* stub for now */
-int vas_win_close(struct vas_window *window)
+static void put_rx_win(struct vas_window *rxwin)
 {
-   return -1;
+   /* Better not be a send window! */
+   WARN_ON_ONCE(rxwin->tx_win);
+
+   atomic_dec(>num_txwins);
 }
 
 struct vas_window *get_vinstance_rxwin(struct vas_instance *vinst,
@@ -706,3 +708,89 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
vas_release_window_id(>ida, rxwin->winid);
return ERR_PTR(rc);
 }
+
+static void poll_window_busy_state(struct vas_window *window)
+{
+   int busy;
+   uint64_t val;
+
+retry:
+   /*
+* Poll Window Busy flag
+*/
+   val = read_hvwc_reg(window, VREG(WIN_STATUS));
+   busy = GET_FIELD(VAS_WIN_BUSY, val);
+   if (busy) {
+   val = 0;
+   schedule_timeout(2000);
+   goto retry;
+   }
+}
+
+static void poll_window_castout(struct vas_window *window)
+{
+   int cached;
+   uint64_t val;
+
+   /* Cast window context out of the cache */
+retry:
+   val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
+   cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
+   if (cached) {
+   val = 0ULL;
+   val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
+   val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
+   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+   schedule_timeout(2000);
+   goto retry;
+   }
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ * - disable new paste operations (unmap paste address)
+ * - Poll for the "Window Busy" bit to be cleared
+ * - Clear the Open/Enable bit for the Window.
+ * - Poll for return of window Credits (implies FIFO empty for Rx win?)
+ * - Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *window)
+{
+   uint64_t val;
+
+   if (!window)
+   return 0;
+
+   if (!window->tx_win && atomic_read(>num_txwins) != 0) {
+   pr_devel("VAS: Attempting to close an active Rx window!\n");
+   WARN_ON_ONCE(1);
+   return -EAGAIN;
+   }
+
+   unmap_wc_paste_kaddr(window);
+
+   poll_window_busy_state(window);
+
+   /* Unpin window from cache and close it */
+   val = read_hvwc_reg(window, VREG(WINCTL));
+   val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+   val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+   write_hvwc_reg(window, VREG(WINCTL), val);
+
+   poll_window_castout(window);
+
+   /* if send window, drop reference to matching receive window */
+   if (window->tx_win)
+   put_rx_win(window->rxwin);
+
+   vas_release_window_id(>vinst->ida, 

[PATCH v4 05/11] VAS: Define helpers for access MMIO regions

2017-03-30 Thread Sukadev Bhattiprolu
Define some helper functions to access the MMIO regions. We use these
in a follow-on patches to read/write VAS hardware registers. These
helpers are also used to later issue 'paste' instructions to submit
requests to the NX hardware engines.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog [v3]:
- Minor reorg/cleanup of map/unmap functions

Changelog [v2]:
- Get HVWC, UWC and paste addresses from window->vinst (i.e DT)
  rather than kernel macros.
---
 arch/powerpc/platforms/powernv/vas-window.c | 126 
 1 file changed, 126 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 6156fbe..ec084d2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -9,9 +9,135 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #include "vas.h"
 
+/*
+ * Compute the paste address region for the window @window using the
+ * ->win_base_addr and ->win_id_shift we got from device tree.
+ */
+void compute_paste_address(struct vas_window *window, uint64_t *addr, int *len)
+{
+   uint64_t base, shift;
+   int winid;
+
+   base = window->vinst->win_base_addr;
+   shift = window->vinst->win_id_shift;
+   winid = window->winid;
+
+   *addr  = base + (winid << shift);
+   *len = PAGE_SIZE;
+
+   pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct vas_window *window,
+   uint64_t *start, int *len)
+{
+   uint64_t pbaddr;
+
+   pbaddr = window->vinst->hvwc_bar_start;
+   *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+   *len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct vas_window *window,
+   uint64_t *start, int *len)
+{
+   uint64_t pbaddr;
+
+   pbaddr = window->vinst->uwc_bar_start;
+   *start = pbaddr + window->winid * VAS_UWC_SIZE;
+   *len = VAS_UWC_SIZE;
+}
+
+static void *map_mmio_region(char *name, uint64_t start, int len)
+{
+   void *map;
+
+   if (!request_mem_region(start, len, name)) {
+   pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+   __func__, start, len);
+   return NULL;
+   }
+
+   map = __ioremap(start, len, pgprot_val(pgprot_cached(__pgprot(0;
+   if (!map) {
+   pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+   len);
+   return NULL;
+   }
+
+   return map;
+}
+
+/*
+ * Unmap the MMIO regions for a window.
+ */
+static void unmap_wc_paste_kaddr(struct vas_window *window)
+{
+   int len;
+   uint64_t busaddr_start;
+
+   if (window->paste_kaddr) {
+   iounmap(window->paste_kaddr);
+   compute_paste_address(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->paste_kaddr = NULL;
+   }
+
+}
+
+static void unmap_wc_mmio_bars(struct vas_window *window)
+{
+   int len;
+   uint64_t busaddr_start;
+
+   unmap_wc_paste_kaddr(window);
+
+   if (window->hvwc_map) {
+   iounmap(window->hvwc_map);
+   get_hvwc_mmio_bar(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->hvwc_map = NULL;
+   }
+
+   if (window->uwc_map) {
+   iounmap(window->uwc_map);
+   get_uwc_mmio_bar(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->uwc_map = NULL;
+   }
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+int map_wc_mmio_bars(struct vas_window *window)
+{
+   int len;
+   uint64_t start;
+
+   window->paste_kaddr = window->hvwc_map = window->uwc_map = NULL;
+
+   get_hvwc_mmio_bar(window, , );
+   window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+   get_uwc_mmio_bar(window, , );
+   window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+   if (!window->hvwc_map || !window->uwc_map) {
+   unmap_wc_mmio_bars(window);
+   return -1;
+   }
+
+   return 0;
+}
+
 /* stub for now */
 int vas_win_close(struct vas_window *window)
 {
-- 
2.7.4



[PATCH v4 11/11] VAS: Define copy/paste interfaces

2017-03-30 Thread Sukadev Bhattiprolu
Define interfaces (wrappers) to the 'copy' and 'paste' instructions
(which are new in PowerISA 3.0). These are intended to be used to
by NX driver(s) to submit Coprocessor Request Blocks (CRBs) to the
NX hardware engines.

Signed-off-by: Sukadev Bhattiprolu 

---
Changelog[v3]
- Map raw CR value from paste instruction into an error code.
---
 arch/powerpc/include/asm/vas.h  | 13 +
 arch/powerpc/platforms/powernv/copy-paste.h | 74 +
 arch/powerpc/platforms/powernv/vas-window.c | 50 +++
 arch/powerpc/platforms/powernv/vas.h| 14 ++
 4 files changed, 151 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 944bb4b..4e5a470 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -125,4 +125,17 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
  */
 int vas_win_close(struct vas_window *win);
 
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ * For now, @offset must be 0 and @first must be true.
+ */
+extern int vas_copy_crb(void *crb, int offset, bool first);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. For now, @off must
+ * 0 and @last must be true. @re is expected/assumed to be true for NX windows.
+ */
+extern int vas_paste_crb(struct vas_window *win, int off, bool last, bool re);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h 
b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000..7783bb8
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Macros taken from tools/testing/selftests/powerpc/context_switch/cp_abort.c
+ */
+#define PASTE(RA, RB, L, RC) \
+   .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) \
+ | (L) << (31-10) | (RC) << (31-31))
+
+#define COPY(RA, RB, L) \
+   .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) \
+ | (L) << (31-10))
+
+#define CR0_FXM"0x80"
+#define CR0_SHIFT  28
+#define CR0_MASK   0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB,L
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * L == 1 indicates this is the first copy.
+ *
+ * L == 0 indicates its a continuation of a prior first copy.
+ *
+ * paste RA,RB,L
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ *
+ * L == 0 indicates its a continuation of a prior paste. i.e.
+ * don't wait for the completion or update status.
+ *
+ * L == 1 indicates this is the last paste in the group (i.e.
+ * wait for the group to complete and update status in CR0).
+ *
+ * For Power9, the L bit must be 'true' in both copy and paste.
+ */
+
+static inline int vas_copy(void *crb, int offset, int first)
+{
+   WARN_ON_ONCE(!first);
+
+   __asm__ __volatile(stringify_in_c(COPY(%0, %1, %2))";"
+   :
+   : "b" (offset), "b" (crb), "i" (1)
+   : "memory");
+
+   return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset, int last)
+{
+   unsigned long long cr;
+
+   WARN_ON_ONCE(!last);
+
+   cr = 0;
+   __asm__ __volatile(stringify_in_c(PASTE(%1, %2, 1, 1))";"
+   "mfocrf %0," CR0_FXM ";"
+   : "=r" (cr)
+   : "b" (paste_address), "b" (offset)
+   : "memory");
+
+   return cr;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 4f4c134..fe09d6f 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -14,6 +14,7 @@
 #include 
 
 #include "vas.h"
+#include "copy-paste.h"
 
 static int fault_winid;
 
@@ -866,6 +867,55 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
 
 }
 
+int vas_copy_crb(void *crb, int offset, bool first)
+{
+   if (!vas_initialized())
+   return -1;
+
+   return vas_copy(crb, offset, first);
+}
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *txwin, int offset, bool last, bool re)
+{
+   int rc;
+   uint64_t val;
+   void *addr;
+
+   if (!vas_initialized())
+   return -1;
+   /*
+   

[PATCH v4 10/11] VAS: Define vas_tx_win_open()

2017-03-30 Thread Sukadev Bhattiprolu
Define an interface to open a VAS send window. This interface is
intended to be used the Nest Accelerator (NX) driver(s) to open
a send window and use it to submit compression/encryption requests
to a VAS receive window.

The receive window, identified by the [vasid, cop] parameters, must
already be open in VAS (i.e connected to an NX engine).

Signed-off-by: Sukadev Bhattiprolu 

---
Changelog [v3]:
- Distinguish between hardware PID (SPRN_PID) and Linux pid.
- Use macros rather than enum for threshold-control mode
- Set the pid of send window from attr (needed for user space
  send windows).
- Ignore irq port setting for now. They are needed for user space
  windows and will be added later
---
 arch/powerpc/include/asm/vas.h  |  42 
 arch/powerpc/platforms/powernv/vas-window.c | 157 
 2 files changed, 199 insertions(+)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index c923b8f..944bb4b 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -61,6 +61,29 @@ struct vas_rx_win_attr {
 };
 
 /*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+   enum vas_cop_type cop;
+   int wcreds_max;
+   int lpid;
+   int pidr;   /* hardware PID (from SPRN_PID) */
+   int pid;/* linux process id */
+   int pswid;
+   int rsvd_txbuf_count;
+   int tc_mode;
+
+   bool user_win;
+   bool pin_win;
+   bool rej_no_credit;
+   bool rsvd_txbuf_enable;
+   bool tx_wcred_mode;
+   bool rx_wcred_mode;
+   bool tx_win_ord_mode;
+   bool rx_win_ord_mode;
+};
+
+/*
  * Helper to initialize receive window attributes to defaults for an
  * NX window.
  */
@@ -77,6 +100,25 @@ extern struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
struct vas_rx_win_attr *attr);
 
 /*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+   enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+   struct vas_tx_win_attr *attr);
+
+/*
  * Close the send or receive window identified by @win. For receive windows
  * return -EAGAIN if there are active send windows attached to this receive
  * window.
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 7b1a36c..4f4c134 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -709,6 +709,163 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
return ERR_PTR(rc);
 }
 
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type 
cop)
+{
+   memset(txattr, 0, sizeof(*txattr));
+
+   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+   txattr->rej_no_credit = false;
+   txattr->rx_wcred_mode = true;
+   txattr->tx_wcred_mode = true;
+   txattr->rx_win_ord_mode = true;
+   txattr->tx_win_ord_mode = true;
+   }
+}
+
+static void init_winctx_for_txwin(struct vas_window *txwin,
+   struct vas_tx_win_attr *txattr,
+   struct vas_winctx *winctx)
+{
+   /*
+* We first zero all fields and only set non-zero ones. Following
+* are some fields set to 0/false for the stated reason:
+*
+*  ->notify_os_intr_regIn powerNV, send intrs to HV
+*  ->rsvd_txbuf_count  Not supported yet.
+*  ->notify_disableFalse for NX windows
+*  ->xtra_writeFalse for NX windows
+*  ->notify_early  NA for NX windows
+*  ->lnotify_lpid  NA for Tx windows
+*  ->lnotify_pid   NA for Tx windows
+*  ->lnotify_tid   NA for Tx windows
+*  ->tx_win_cred_mode  Ignore for now for NX windows
+*  ->rx_win_cred_mode  Ignore for now for NX windows
+*/
+   memset(winctx, 0, sizeof(struct vas_winctx));
+
+   winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+   winctx->user_win = txattr->user_win;
+   winctx->nx_win = txwin->rxwin->nx_win;
+   winctx->pin_win = txattr->pin_win;
+
+   winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+   

[PATCH v4 05/11] VAS: Define helpers for access MMIO regions

2017-03-30 Thread Sukadev Bhattiprolu
Define some helper functions to access the MMIO regions. We use these
in a follow-on patches to read/write VAS hardware registers. These
helpers are also used to later issue 'paste' instructions to submit
requests to the NX hardware engines.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog [v3]:
- Minor reorg/cleanup of map/unmap functions

Changelog [v2]:
- Get HVWC, UWC and paste addresses from window->vinst (i.e DT)
  rather than kernel macros.
---
 arch/powerpc/platforms/powernv/vas-window.c | 126 
 1 file changed, 126 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 6156fbe..ec084d2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -9,9 +9,135 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #include "vas.h"
 
+/*
+ * Compute the paste address region for the window @window using the
+ * ->win_base_addr and ->win_id_shift we got from device tree.
+ */
+void compute_paste_address(struct vas_window *window, uint64_t *addr, int *len)
+{
+   uint64_t base, shift;
+   int winid;
+
+   base = window->vinst->win_base_addr;
+   shift = window->vinst->win_id_shift;
+   winid = window->winid;
+
+   *addr  = base + (winid << shift);
+   *len = PAGE_SIZE;
+
+   pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct vas_window *window,
+   uint64_t *start, int *len)
+{
+   uint64_t pbaddr;
+
+   pbaddr = window->vinst->hvwc_bar_start;
+   *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+   *len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct vas_window *window,
+   uint64_t *start, int *len)
+{
+   uint64_t pbaddr;
+
+   pbaddr = window->vinst->uwc_bar_start;
+   *start = pbaddr + window->winid * VAS_UWC_SIZE;
+   *len = VAS_UWC_SIZE;
+}
+
+static void *map_mmio_region(char *name, uint64_t start, int len)
+{
+   void *map;
+
+   if (!request_mem_region(start, len, name)) {
+   pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+   __func__, start, len);
+   return NULL;
+   }
+
+   map = __ioremap(start, len, pgprot_val(pgprot_cached(__pgprot(0;
+   if (!map) {
+   pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+   len);
+   return NULL;
+   }
+
+   return map;
+}
+
+/*
+ * Unmap the MMIO regions for a window.
+ */
+static void unmap_wc_paste_kaddr(struct vas_window *window)
+{
+   int len;
+   uint64_t busaddr_start;
+
+   if (window->paste_kaddr) {
+   iounmap(window->paste_kaddr);
+   compute_paste_address(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->paste_kaddr = NULL;
+   }
+
+}
+
+static void unmap_wc_mmio_bars(struct vas_window *window)
+{
+   int len;
+   uint64_t busaddr_start;
+
+   unmap_wc_paste_kaddr(window);
+
+   if (window->hvwc_map) {
+   iounmap(window->hvwc_map);
+   get_hvwc_mmio_bar(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->hvwc_map = NULL;
+   }
+
+   if (window->uwc_map) {
+   iounmap(window->uwc_map);
+   get_uwc_mmio_bar(window, _start, );
+   release_mem_region((phys_addr_t)busaddr_start, len);
+   window->uwc_map = NULL;
+   }
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+int map_wc_mmio_bars(struct vas_window *window)
+{
+   int len;
+   uint64_t start;
+
+   window->paste_kaddr = window->hvwc_map = window->uwc_map = NULL;
+
+   get_hvwc_mmio_bar(window, , );
+   window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+   get_uwc_mmio_bar(window, , );
+   window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+   if (!window->hvwc_map || !window->uwc_map) {
+   unmap_wc_mmio_bars(window);
+   return -1;
+   }
+
+   return 0;
+}
+
 /* stub for now */
 int vas_win_close(struct vas_window *window)
 {
-- 
2.7.4



[PATCH v4 11/11] VAS: Define copy/paste interfaces

2017-03-30 Thread Sukadev Bhattiprolu
Define interfaces (wrappers) to the 'copy' and 'paste' instructions
(which are new in PowerISA 3.0). These are intended to be used to
by NX driver(s) to submit Coprocessor Request Blocks (CRBs) to the
NX hardware engines.

Signed-off-by: Sukadev Bhattiprolu 

---
Changelog[v3]
- Map raw CR value from paste instruction into an error code.
---
 arch/powerpc/include/asm/vas.h  | 13 +
 arch/powerpc/platforms/powernv/copy-paste.h | 74 +
 arch/powerpc/platforms/powernv/vas-window.c | 50 +++
 arch/powerpc/platforms/powernv/vas.h| 14 ++
 4 files changed, 151 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 944bb4b..4e5a470 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -125,4 +125,17 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
  */
 int vas_win_close(struct vas_window *win);
 
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ * For now, @offset must be 0 and @first must be true.
+ */
+extern int vas_copy_crb(void *crb, int offset, bool first);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. For now, @off must
+ * 0 and @last must be true. @re is expected/assumed to be true for NX windows.
+ */
+extern int vas_paste_crb(struct vas_window *win, int off, bool last, bool re);
+
 #endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h 
b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000..7783bb8
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Macros taken from tools/testing/selftests/powerpc/context_switch/cp_abort.c
+ */
+#define PASTE(RA, RB, L, RC) \
+   .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) \
+ | (L) << (31-10) | (RC) << (31-31))
+
+#define COPY(RA, RB, L) \
+   .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) \
+ | (L) << (31-10))
+
+#define CR0_FXM"0x80"
+#define CR0_SHIFT  28
+#define CR0_MASK   0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB,L
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * L == 1 indicates this is the first copy.
+ *
+ * L == 0 indicates its a continuation of a prior first copy.
+ *
+ * paste RA,RB,L
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ *
+ * L == 0 indicates its a continuation of a prior paste. i.e.
+ * don't wait for the completion or update status.
+ *
+ * L == 1 indicates this is the last paste in the group (i.e.
+ * wait for the group to complete and update status in CR0).
+ *
+ * For Power9, the L bit must be 'true' in both copy and paste.
+ */
+
+static inline int vas_copy(void *crb, int offset, int first)
+{
+   WARN_ON_ONCE(!first);
+
+   __asm__ __volatile(stringify_in_c(COPY(%0, %1, %2))";"
+   :
+   : "b" (offset), "b" (crb), "i" (1)
+   : "memory");
+
+   return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset, int last)
+{
+   unsigned long long cr;
+
+   WARN_ON_ONCE(!last);
+
+   cr = 0;
+   __asm__ __volatile(stringify_in_c(PASTE(%1, %2, 1, 1))";"
+   "mfocrf %0," CR0_FXM ";"
+   : "=r" (cr)
+   : "b" (paste_address), "b" (offset)
+   : "memory");
+
+   return cr;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 4f4c134..fe09d6f 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -14,6 +14,7 @@
 #include 
 
 #include "vas.h"
+#include "copy-paste.h"
 
 static int fault_winid;
 
@@ -866,6 +867,55 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
 
 }
 
+int vas_copy_crb(void *crb, int offset, bool first)
+{
+   if (!vas_initialized())
+   return -1;
+
+   return vas_copy(crb, offset, first);
+}
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *txwin, int offset, bool last, bool re)
+{
+   int rc;
+   uint64_t val;
+   void *addr;
+
+   if (!vas_initialized())
+   return -1;
+   /*
+* Only NX windows are 

[PATCH v4 10/11] VAS: Define vas_tx_win_open()

2017-03-30 Thread Sukadev Bhattiprolu
Define an interface to open a VAS send window. This interface is
intended to be used the Nest Accelerator (NX) driver(s) to open
a send window and use it to submit compression/encryption requests
to a VAS receive window.

The receive window, identified by the [vasid, cop] parameters, must
already be open in VAS (i.e connected to an NX engine).

Signed-off-by: Sukadev Bhattiprolu 

---
Changelog [v3]:
- Distinguish between hardware PID (SPRN_PID) and Linux pid.
- Use macros rather than enum for threshold-control mode
- Set the pid of send window from attr (needed for user space
  send windows).
- Ignore irq port setting for now. They are needed for user space
  windows and will be added later
---
 arch/powerpc/include/asm/vas.h  |  42 
 arch/powerpc/platforms/powernv/vas-window.c | 157 
 2 files changed, 199 insertions(+)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index c923b8f..944bb4b 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -61,6 +61,29 @@ struct vas_rx_win_attr {
 };
 
 /*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+   enum vas_cop_type cop;
+   int wcreds_max;
+   int lpid;
+   int pidr;   /* hardware PID (from SPRN_PID) */
+   int pid;/* linux process id */
+   int pswid;
+   int rsvd_txbuf_count;
+   int tc_mode;
+
+   bool user_win;
+   bool pin_win;
+   bool rej_no_credit;
+   bool rsvd_txbuf_enable;
+   bool tx_wcred_mode;
+   bool rx_wcred_mode;
+   bool tx_win_ord_mode;
+   bool rx_win_ord_mode;
+};
+
+/*
  * Helper to initialize receive window attributes to defaults for an
  * NX window.
  */
@@ -77,6 +100,25 @@ extern struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
struct vas_rx_win_attr *attr);
 
 /*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+   enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+   struct vas_tx_win_attr *attr);
+
+/*
  * Close the send or receive window identified by @win. For receive windows
  * return -EAGAIN if there are active send windows attached to this receive
  * window.
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 7b1a36c..4f4c134 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -709,6 +709,163 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
return ERR_PTR(rc);
 }
 
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type 
cop)
+{
+   memset(txattr, 0, sizeof(*txattr));
+
+   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+   txattr->rej_no_credit = false;
+   txattr->rx_wcred_mode = true;
+   txattr->tx_wcred_mode = true;
+   txattr->rx_win_ord_mode = true;
+   txattr->tx_win_ord_mode = true;
+   }
+}
+
+static void init_winctx_for_txwin(struct vas_window *txwin,
+   struct vas_tx_win_attr *txattr,
+   struct vas_winctx *winctx)
+{
+   /*
+* We first zero all fields and only set non-zero ones. Following
+* are some fields set to 0/false for the stated reason:
+*
+*  ->notify_os_intr_regIn powerNV, send intrs to HV
+*  ->rsvd_txbuf_count  Not supported yet.
+*  ->notify_disableFalse for NX windows
+*  ->xtra_writeFalse for NX windows
+*  ->notify_early  NA for NX windows
+*  ->lnotify_lpid  NA for Tx windows
+*  ->lnotify_pid   NA for Tx windows
+*  ->lnotify_tid   NA for Tx windows
+*  ->tx_win_cred_mode  Ignore for now for NX windows
+*  ->rx_win_cred_mode  Ignore for now for NX windows
+*/
+   memset(winctx, 0, sizeof(struct vas_winctx));
+
+   winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+   winctx->user_win = txattr->user_win;
+   winctx->nx_win = txwin->rxwin->nx_win;
+   winctx->pin_win = txattr->pin_win;
+
+   winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+   winctx->tx_wcred_mode = 

[PATCH v4 04/11] VAS: Define vas_init() and vas_exit()

2017-03-30 Thread Sukadev Bhattiprolu
Implement vas_init() and vas_exit() functions for a new VAS module.
This VAS module is essentially a library for other device drivers
and kernel users of the NX coprocessors like NX-842 and NX-GZIP.
In the future this will be extended to add support for user space
to access the NX coprocessors.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]:
- [Michael Neuling] Fix some accidental deletions; fix help text
  in Kconfig; change vas_initialized to a function; move from
  drivers/misc to arch/powerpc/kernel
- Drop the vas_window_reset() interface. It is not needed as
  window will be initialized before each use.
Changelog[v3]:
- Zero vas_instances memory on allocation
- [Haren Myneni] Fix description in Kconfig
Changelog[v2]:
- Get HVWC, UWC and window address parameters from device tree.
---
 arch/powerpc/platforms/powernv/Kconfig  |  14 +++
 arch/powerpc/platforms/powernv/Makefile |   1 +
 arch/powerpc/platforms/powernv/vas-window.c |  19 
 arch/powerpc/platforms/powernv/vas.c| 145 
 arch/powerpc/platforms/powernv/vas.h|   3 +
 5 files changed, 182 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/vas-window.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.c

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 3a07e4d..54e2a4e 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -27,3 +27,17 @@ config OPAL_PRD
help
  This enables the opal-prd driver, a facility to run processor
  recovery diagnostics on OpenPower machines
+
+config VAS
+   tristate "IBM Virtual Accelerator Switchboard (VAS)"
+   depends on PPC_POWERNV
+   default n
+   help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS allows accelerators in co-processors like NX-GZIP and NX-842
+ to be accessible to kernel subsystems.
+
+ VAS adapters are found in POWER9 based systems.
+
+ If unsure, say N.
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..ebef20b 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM)+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)  += opal-tracepoints.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_VAS)  += vas.o vas-window.o
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000..6156fbe
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+
+#include "vas.h"
+
+/* stub for now */
+int vas_win_close(struct vas_window *window)
+{
+   return -1;
+}
diff --git a/arch/powerpc/platforms/powernv/vas.c 
b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000..9bf8f57
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vas.h"
+
+static bool init_done;
+int vas_num_instances;
+struct vas_instance *vas_instances;
+
+static int init_vas_instance(struct device_node *dn,
+   struct vas_instance *vinst)
+{
+   int rc;
+   const __be32 *p;
+   u64 values[6];
+
+   ida_init(>ida);
+   mutex_init(>mutex);
+
+   p = of_get_property(dn, "vas-id", NULL);
+   if (!p) {
+   pr_err("VAS: NULL vas-id? %p\n", p);
+   return -ENODEV;
+   }
+
+   vinst->vas_id = of_read_number(p, 1);
+
+   /*
+* Hardcoded 6 is tied to corresponding code in
+*  skiboot.git/core/vas.c
+*/
+   rc = of_property_read_variable_u64_array(dn, "reg", values, 6, 6);
+   if (rc != 6) {
+   pr_err("VAS %d: Unable to read reg properties, rc %d\n",
+   vinst->vas_id, rc);
+   return rc;
+   }
+
+   vinst->hvwc_bar_start = values[0];
+   vinst->hvwc_bar_len = values[1];
+   vinst->uwc_bar_start = values[2];
+   vinst->uwc_bar_len = values[3];
+  

[PATCH v4 02/11] VAS: Define macros, register fields and structures

2017-03-30 Thread Sukadev Bhattiprolu
Define macros for the VAS hardware registers and bit-fields as well
as couple of data structures needed by the VAS driver.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]
- [Michael Neuling] Move VAS code to arch/powerpc; Reorg vas.h and
  vas-internal.h to kernel and uapi versions; rather than creating
  separate properties for window context/address entries in device
  tree, combine them into "reg" properties; drop ->hwirq and irq_port
  fields from vas_window as they are only needed with user space
  windows.

Changelog[v3]
- Rename winctx->pid to winctx->pidr to reflect that its a value
  from the PID register (SPRN_PID), not the linux process id.
- Make it easier to split header into kernel/user parts
- To keep user interface simple, use macros rather than enum for
  the threshold-control modes.
- Add a pid field to struct vas_window - needed for user space
  send windows.

Changelog[v2]
- Add an overview of VAS in vas-internal.h
- Get window context parameters from device tree and drop
  unnecessary macros.
---
 arch/powerpc/include/asm/vas.h   |  34 +++
 arch/powerpc/include/uapi/asm/vas.h  |  25 +++
 arch/powerpc/platforms/powernv/vas.h | 387 +++
 3 files changed, 446 insertions(+)
 create mode 100644 arch/powerpc/include/asm/vas.h
 create mode 100644 arch/powerpc/include/uapi/asm/vas.h
 create mode 100644 arch/powerpc/platforms/powernv/vas.h

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 000..e2575d5
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MISC_VAS_H
+#define _MISC_VAS_H
+
+#include 
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN   (1 << 10)   /* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX   (8 << 20)   /* 8MB */
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+   VAS_COP_TYPE_FAULT,
+   VAS_COP_TYPE_842,
+   VAS_COP_TYPE_842_HIPRI,
+   VAS_COP_TYPE_GZIP,
+   VAS_COP_TYPE_GZIP_HIPRI,
+   VAS_COP_TYPE_MAX,
+};
+
+#endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/include/uapi/asm/vas.h 
b/arch/powerpc/include/uapi/asm/vas.h
new file mode 100644
index 000..ddfe046
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/vas.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ *  limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED0
+#define VAS_THRESH_FIFO_GT_HALF_FULL   1
+#define VAS_THRESH_FIFO_GT_QTR_FULL2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
+
+#endif /* _UAPI_MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000..c63395d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_PPC_4K_PAGES
+#  error "TODO: Compute RMA/Paste-address for 4K pages."
+#else
+#ifndef CONFIG_PPC_64K_PAGES
+#  error "Unexpected Page size."
+#endif
+#endif
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them 

[PATCH v4 04/11] VAS: Define vas_init() and vas_exit()

2017-03-30 Thread Sukadev Bhattiprolu
Implement vas_init() and vas_exit() functions for a new VAS module.
This VAS module is essentially a library for other device drivers
and kernel users of the NX coprocessors like NX-842 and NX-GZIP.
In the future this will be extended to add support for user space
to access the NX coprocessors.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]:
- [Michael Neuling] Fix some accidental deletions; fix help text
  in Kconfig; change vas_initialized to a function; move from
  drivers/misc to arch/powerpc/kernel
- Drop the vas_window_reset() interface. It is not needed as
  window will be initialized before each use.
Changelog[v3]:
- Zero vas_instances memory on allocation
- [Haren Myneni] Fix description in Kconfig
Changelog[v2]:
- Get HVWC, UWC and window address parameters from device tree.
---
 arch/powerpc/platforms/powernv/Kconfig  |  14 +++
 arch/powerpc/platforms/powernv/Makefile |   1 +
 arch/powerpc/platforms/powernv/vas-window.c |  19 
 arch/powerpc/platforms/powernv/vas.c| 145 
 arch/powerpc/platforms/powernv/vas.h|   3 +
 5 files changed, 182 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/vas-window.c
 create mode 100644 arch/powerpc/platforms/powernv/vas.c

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 3a07e4d..54e2a4e 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -27,3 +27,17 @@ config OPAL_PRD
help
  This enables the opal-prd driver, a facility to run processor
  recovery diagnostics on OpenPower machines
+
+config VAS
+   tristate "IBM Virtual Accelerator Switchboard (VAS)"
+   depends on PPC_POWERNV
+   default n
+   help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS allows accelerators in co-processors like NX-GZIP and NX-842
+ to be accessible to kernel subsystems.
+
+ VAS adapters are found in POWER9 based systems.
+
+ If unsure, say N.
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..ebef20b 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM)+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)  += opal-tracepoints.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_VAS)  += vas.o vas-window.o
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000..6156fbe
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+
+#include "vas.h"
+
+/* stub for now */
+int vas_win_close(struct vas_window *window)
+{
+   return -1;
+}
diff --git a/arch/powerpc/platforms/powernv/vas.c 
b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000..9bf8f57
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vas.h"
+
+static bool init_done;
+int vas_num_instances;
+struct vas_instance *vas_instances;
+
+static int init_vas_instance(struct device_node *dn,
+   struct vas_instance *vinst)
+{
+   int rc;
+   const __be32 *p;
+   u64 values[6];
+
+   ida_init(>ida);
+   mutex_init(>mutex);
+
+   p = of_get_property(dn, "vas-id", NULL);
+   if (!p) {
+   pr_err("VAS: NULL vas-id? %p\n", p);
+   return -ENODEV;
+   }
+
+   vinst->vas_id = of_read_number(p, 1);
+
+   /*
+* Hardcoded 6 is tied to corresponding code in
+*  skiboot.git/core/vas.c
+*/
+   rc = of_property_read_variable_u64_array(dn, "reg", values, 6, 6);
+   if (rc != 6) {
+   pr_err("VAS %d: Unable to read reg properties, rc %d\n",
+   vinst->vas_id, rc);
+   return rc;
+   }
+
+   vinst->hvwc_bar_start = values[0];
+   vinst->hvwc_bar_len = values[1];
+   vinst->uwc_bar_start = values[2];
+   vinst->uwc_bar_len = values[3];
+   vinst->win_base_addr = 

[PATCH v4 02/11] VAS: Define macros, register fields and structures

2017-03-30 Thread Sukadev Bhattiprolu
Define macros for the VAS hardware registers and bit-fields as well
as couple of data structures needed by the VAS driver.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v4]
- [Michael Neuling] Move VAS code to arch/powerpc; Reorg vas.h and
  vas-internal.h to kernel and uapi versions; rather than creating
  separate properties for window context/address entries in device
  tree, combine them into "reg" properties; drop ->hwirq and irq_port
  fields from vas_window as they are only needed with user space
  windows.

Changelog[v3]
- Rename winctx->pid to winctx->pidr to reflect that its a value
  from the PID register (SPRN_PID), not the linux process id.
- Make it easier to split header into kernel/user parts
- To keep user interface simple, use macros rather than enum for
  the threshold-control modes.
- Add a pid field to struct vas_window - needed for user space
  send windows.

Changelog[v2]
- Add an overview of VAS in vas-internal.h
- Get window context parameters from device tree and drop
  unnecessary macros.
---
 arch/powerpc/include/asm/vas.h   |  34 +++
 arch/powerpc/include/uapi/asm/vas.h  |  25 +++
 arch/powerpc/platforms/powernv/vas.h | 387 +++
 3 files changed, 446 insertions(+)
 create mode 100644 arch/powerpc/include/asm/vas.h
 create mode 100644 arch/powerpc/include/uapi/asm/vas.h
 create mode 100644 arch/powerpc/platforms/powernv/vas.h

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 000..e2575d5
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MISC_VAS_H
+#define _MISC_VAS_H
+
+#include 
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN   (1 << 10)   /* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX   (8 << 20)   /* 8MB */
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+   VAS_COP_TYPE_FAULT,
+   VAS_COP_TYPE_842,
+   VAS_COP_TYPE_842_HIPRI,
+   VAS_COP_TYPE_GZIP,
+   VAS_COP_TYPE_GZIP_HIPRI,
+   VAS_COP_TYPE_MAX,
+};
+
+#endif /* _MISC_VAS_H */
diff --git a/arch/powerpc/include/uapi/asm/vas.h 
b/arch/powerpc/include/uapi/asm/vas.h
new file mode 100644
index 000..ddfe046
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/vas.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ *  limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED0
+#define VAS_THRESH_FIFO_GT_HALF_FULL   1
+#define VAS_THRESH_FIFO_GT_QTR_FULL2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
+
+#endif /* _UAPI_MISC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000..c63395d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2016 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_PPC_4K_PAGES
+#  error "TODO: Compute RMA/Paste-address for 4K pages."
+#else
+#ifndef CONFIG_PPC_64K_PAGES
+#  error "Unexpected Page size."
+#endif
+#endif
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" 

[lkp-robot] [staging] 5b6f9b95f7: kernel_BUG_at_drivers/base/driver.c

2017-03-30 Thread kernel test robot

FYI, we noticed the following commit:

commit: 5b6f9b95f7aeddf8a77a991bf1657a84ca281ab0 ("staging: unisys: visorbus: 
get rid of create_bus_type.")
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu Westmere -m 512M

caused below changes (please refer to attached dmesg/kmsg for entire 
log/backtrace):


+--+++
|  | 362f87f312 | 5b6f9b95f7 |
+--+++
| boot_successes   | 8  | 0  |
| boot_failures| 0  | 8  |
| kernel_BUG_at_drivers/base/driver.c  | 0  | 8  |
| invalid_opcode:#[##] | 0  | 8  |
| Kernel_panic-not_syncing:Fatal_exception | 0  | 8  |
+--+++



[8.388559] kernel BUG at drivers/base/driver.c:153!
[8.390551] invalid opcode:  [#1] DEBUG_PAGEALLOC
[8.392425] CPU: 0 PID: 1 Comm: swapper Not tainted 
4.11.0-rc4-00802-g5b6f9b9 #1
[8.394507] task: 8818bb80 task.stack: 88184000
[8.396386] RIP: 0010:driver_register+0x23/0x116
[8.398106] RSP: :88187e30 EFLAGS: 00010296
[8.398845] RAX:  RBX: 848bc4c0 RCX: 8818bb80
[8.400886] RDX: 88001748cc38 RSI: 88001748b460 RDI: 848bc4c0
[8.402928] RBP: 88187e40 R08: 88001748d658 R09: 
[8.406008] R10: 88187c78 R11: 002dbf72 R12: 848bbf00
[8.430768] R13:  R14: 848f4792 R15: 0001
[8.431780] FS:  () GS:8482e000() 
knlGS:
[8.432925] CS:  0010 DS:  ES:  CR0: 80050033
[8.433746] CR2:  CR3: 1d015000 CR4: 06f0
[8.434871] Call Trace:
[8.435357]  visorbus_register_visor_driver+0x41/0x62
[8.436104]  visornic_init+0x9c/0xbf
[8.436677]  ? visornic_change_mtu+0x10/0x10
[8.437353]  do_one_initcall+0xa0/0x176
[8.437909]  ? do_early_param+0xc0/0xc0
[8.438560]  kernel_init_freeable+0x1a7/0x260
[8.439330]  ? rest_init+0xc9/0xc9
[8.439825]  kernel_init+0xf/0x138
[8.440389]  ? rest_init+0xc9/0xc9
[8.440939]  ret_from_fork+0x2c/0x40
[8.441455] Code: 4c 89 e0 5b 41 5c 5d c3 55 48 89 e5 41 54 53 48 89 fb e8 
36 9b d0 ff 4c 8b 63 08 49 83 bc 24 98 00 00 00 00 75 07 e8 22 9b d0 ff <0f> 0b 
e8 1b 9b d0 ff 49 83 7c 24 48 00 75 0f e8 0e 9b d0 ff 49 
[8.444605] RIP: driver_register+0x23/0x116 RSP: 88187e30
[8.445560] ---[ end trace 06c354500c4eab13 ]---


To reproduce:

git clone https://github.com/01org/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Xiaolong
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.11.0-rc4 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
CONFIG_KERNEL_LZ4=y
CONFIG_DEFAULT_HOSTNAME="(none)"
# 

[lkp-robot] [staging] 5b6f9b95f7: kernel_BUG_at_drivers/base/driver.c

2017-03-30 Thread kernel test robot

FYI, we noticed the following commit:

commit: 5b6f9b95f7aeddf8a77a991bf1657a84ca281ab0 ("staging: unisys: visorbus: 
get rid of create_bus_type.")
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu Westmere -m 512M

caused below changes (please refer to attached dmesg/kmsg for entire 
log/backtrace):


+--+++
|  | 362f87f312 | 5b6f9b95f7 |
+--+++
| boot_successes   | 8  | 0  |
| boot_failures| 0  | 8  |
| kernel_BUG_at_drivers/base/driver.c  | 0  | 8  |
| invalid_opcode:#[##] | 0  | 8  |
| Kernel_panic-not_syncing:Fatal_exception | 0  | 8  |
+--+++



[8.388559] kernel BUG at drivers/base/driver.c:153!
[8.390551] invalid opcode:  [#1] DEBUG_PAGEALLOC
[8.392425] CPU: 0 PID: 1 Comm: swapper Not tainted 
4.11.0-rc4-00802-g5b6f9b9 #1
[8.394507] task: 8818bb80 task.stack: 88184000
[8.396386] RIP: 0010:driver_register+0x23/0x116
[8.398106] RSP: :88187e30 EFLAGS: 00010296
[8.398845] RAX:  RBX: 848bc4c0 RCX: 8818bb80
[8.400886] RDX: 88001748cc38 RSI: 88001748b460 RDI: 848bc4c0
[8.402928] RBP: 88187e40 R08: 88001748d658 R09: 
[8.406008] R10: 88187c78 R11: 002dbf72 R12: 848bbf00
[8.430768] R13:  R14: 848f4792 R15: 0001
[8.431780] FS:  () GS:8482e000() 
knlGS:
[8.432925] CS:  0010 DS:  ES:  CR0: 80050033
[8.433746] CR2:  CR3: 1d015000 CR4: 06f0
[8.434871] Call Trace:
[8.435357]  visorbus_register_visor_driver+0x41/0x62
[8.436104]  visornic_init+0x9c/0xbf
[8.436677]  ? visornic_change_mtu+0x10/0x10
[8.437353]  do_one_initcall+0xa0/0x176
[8.437909]  ? do_early_param+0xc0/0xc0
[8.438560]  kernel_init_freeable+0x1a7/0x260
[8.439330]  ? rest_init+0xc9/0xc9
[8.439825]  kernel_init+0xf/0x138
[8.440389]  ? rest_init+0xc9/0xc9
[8.440939]  ret_from_fork+0x2c/0x40
[8.441455] Code: 4c 89 e0 5b 41 5c 5d c3 55 48 89 e5 41 54 53 48 89 fb e8 
36 9b d0 ff 4c 8b 63 08 49 83 bc 24 98 00 00 00 00 75 07 e8 22 9b d0 ff <0f> 0b 
e8 1b 9b d0 ff 49 83 7c 24 48 00 75 0f e8 0e 9b d0 ff 49 
[8.444605] RIP: driver_register+0x23/0x116 RSP: 88187e30
[8.445560] ---[ end trace 06c354500c4eab13 ]---


To reproduce:

git clone https://github.com/01org/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Xiaolong
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.11.0-rc4 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
CONFIG_KERNEL_LZ4=y
CONFIG_DEFAULT_HOSTNAME="(none)"
# 

Re: [PATCH v3 14/37] mtd: nand: denali: support "nand-ecc-strength" DT property

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-30 23:02 GMT+09:00 Boris Brezillon :
> On Thu, 30 Mar 2017 15:46:00 +0900
> Masahiro Yamada  wrote:
>
>> Historically, this driver tried to choose as big ECC strength as
>> possible, but it would be reasonable to allow DT to set a particular
>> ECC strength with "nand-ecc-strength" property.  This is useful
>> when a particular ECC setting is hard-coded by firmware (or hard-
>> wired by boot ROM).
>>
>> If no ECC strength is specified in DT, "nand-ecc-maximize" is implied
>> since this was the original behavior.
>
> You said there is currently no DT users,

Right.  No DT users ever in upstream.


> so how about changing the
> "fallback to ECC maximization" behavior for DT users, and instead of
> maximizing the ECC strength take the NAND requirements into account
> (chip->ecc_strength_ds).

This is difficult to judge in some cases.

As I said before, 4/512 and 8/1024 are not equivalent.

If chip's requirement  chip->ecc_step_ds matches
to the ecc->size supported by the controller,
this is easy.


If a chip requests 1024B, then the controller can only support 512B chunk
(or vice versa), it is difficult to simply compare
ecc strength.

Is it a bad thing if we use too strong ECC strength?

The disadvantage I see is we will have less OOB-free bytes,
but this will not be fatal, I guess.




-- 
Best Regards
Masahiro Yamada


Re: [PATCH v3 14/37] mtd: nand: denali: support "nand-ecc-strength" DT property

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-30 23:02 GMT+09:00 Boris Brezillon :
> On Thu, 30 Mar 2017 15:46:00 +0900
> Masahiro Yamada  wrote:
>
>> Historically, this driver tried to choose as big ECC strength as
>> possible, but it would be reasonable to allow DT to set a particular
>> ECC strength with "nand-ecc-strength" property.  This is useful
>> when a particular ECC setting is hard-coded by firmware (or hard-
>> wired by boot ROM).
>>
>> If no ECC strength is specified in DT, "nand-ecc-maximize" is implied
>> since this was the original behavior.
>
> You said there is currently no DT users,

Right.  No DT users ever in upstream.


> so how about changing the
> "fallback to ECC maximization" behavior for DT users, and instead of
> maximizing the ECC strength take the NAND requirements into account
> (chip->ecc_strength_ds).

This is difficult to judge in some cases.

As I said before, 4/512 and 8/1024 are not equivalent.

If chip's requirement  chip->ecc_step_ds matches
to the ecc->size supported by the controller,
this is easy.


If a chip requests 1024B, then the controller can only support 512B chunk
(or vice versa), it is difficult to simply compare
ecc strength.

Is it a bad thing if we use too strong ECC strength?

The disadvantage I see is we will have less OOB-free bytes,
but this will not be fatal, I guess.




-- 
Best Regards
Masahiro Yamada


[lkp-robot] [PCI] d3881e5015: BUG:kernel_hang_in_boot_stage

2017-03-30 Thread kernel test robot

FYI, we noticed the following commit:

commit: d3881e5015421a578bc328136471fcf1d02ac389 ("PCI: Export PCI device 
config accessors")
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git pci/enumeration

in testcase: trinity
with following parameters:

runtime: 300s

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/


on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -m 512M

caused below changes (please refer to attached dmesg/kmsg for entire 
log/backtrace):


+-+---++
|   
  | v4.11-rc1 | d3881e5015 |
+-+---++
| boot_successes
  | 0 | 0  |
| boot_failures 
  | 16| 10 |
| is_trying_to_contend_lock(ww_class_mutex)at   
  | 5 ||
| calltrace:__ww_mutex_lock 
  | 5 ||
| BUG:kernel_hang_in_test_stage 
  | 1 ||
| invoked_oom-killer:gfp_mask=0x
  | 2 ||
| Mem-Info  
  | 2 ||
| Out_of_memory:Kill_process
  | 2 ||
| 
page_allocation_failure:order:#,mode:#(GFP_HIGHUSER_MOVABLE|__GFP_ZERO),nodemask=(null)
 | 2 ||
| BUG:soft_lockup-CPU##stuck_for#s  
  | 11||
| Kernel_panic-not_syncing:softlockup:hung_tasks
  | 11||
| BUG:kernel_hang_in_boot_stage 
  | 0 | 10 |
+-+---++



[0.00] BRK [0x0526, 0x05260fff] PGTABLE
[0.00] BRK [0x05261000, 0x05261fff] PGTABLE
[0.00] BRK [0x05262000, 0x05262fff] PGTABLE

Elapsed time: 510
BUG: kernel hang in boot stage

initrds=(
/osimage/yocto/yocto-minimal-x86_64-2016-04-22.cgz

/lkp/scheduled/vm-kbuild-yocto-x86_64-58/trinity-300s-yocto-minimal-x86_64-2016-04-22.cgz-d3881e5015421a578bc328136471fcf1d02ac389-20170331-22860-1h665bt-0.cgz
/lkp/lkp/lkp-x86_64.cgz


To reproduce:

git clone https://github.com/01org/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Xiaolong
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.11.0-rc1 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_KASAN_SHADOW_OFFSET=0xdc00
CONFIG_X86_64_SMP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_CONSTRUCTORS=y
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not 

[lkp-robot] [PCI] d3881e5015: BUG:kernel_hang_in_boot_stage

2017-03-30 Thread kernel test robot

FYI, we noticed the following commit:

commit: d3881e5015421a578bc328136471fcf1d02ac389 ("PCI: Export PCI device 
config accessors")
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git pci/enumeration

in testcase: trinity
with following parameters:

runtime: 300s

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/


on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -m 512M

caused below changes (please refer to attached dmesg/kmsg for entire 
log/backtrace):


+-+---++
|   
  | v4.11-rc1 | d3881e5015 |
+-+---++
| boot_successes
  | 0 | 0  |
| boot_failures 
  | 16| 10 |
| is_trying_to_contend_lock(ww_class_mutex)at   
  | 5 ||
| calltrace:__ww_mutex_lock 
  | 5 ||
| BUG:kernel_hang_in_test_stage 
  | 1 ||
| invoked_oom-killer:gfp_mask=0x
  | 2 ||
| Mem-Info  
  | 2 ||
| Out_of_memory:Kill_process
  | 2 ||
| 
page_allocation_failure:order:#,mode:#(GFP_HIGHUSER_MOVABLE|__GFP_ZERO),nodemask=(null)
 | 2 ||
| BUG:soft_lockup-CPU##stuck_for#s  
  | 11||
| Kernel_panic-not_syncing:softlockup:hung_tasks
  | 11||
| BUG:kernel_hang_in_boot_stage 
  | 0 | 10 |
+-+---++



[0.00] BRK [0x0526, 0x05260fff] PGTABLE
[0.00] BRK [0x05261000, 0x05261fff] PGTABLE
[0.00] BRK [0x05262000, 0x05262fff] PGTABLE

Elapsed time: 510
BUG: kernel hang in boot stage

initrds=(
/osimage/yocto/yocto-minimal-x86_64-2016-04-22.cgz

/lkp/scheduled/vm-kbuild-yocto-x86_64-58/trinity-300s-yocto-minimal-x86_64-2016-04-22.cgz-d3881e5015421a578bc328136471fcf1d02ac389-20170331-22860-1h665bt-0.cgz
/lkp/lkp/lkp-x86_64.cgz


To reproduce:

git clone https://github.com/01org/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k  job-script  # job-script is attached in this 
email



Thanks,
Xiaolong
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.11.0-rc1 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_MMU=y
CONFIG_ARCH_MMAP_RND_BITS_MIN=28
CONFIG_ARCH_MMAP_RND_BITS_MAX=32
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_KASAN_SHADOW_OFFSET=0xdc00
CONFIG_X86_64_SMP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_CONSTRUCTORS=y
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not 

Re: [PATCH] x86/fpu: move FPU state into separate cache

2017-03-30 Thread kbuild test robot
Hi Kees,

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.11-rc4 next-20170330]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/x86-fpu-move-FPU-state-into-separate-cache/20170331-110507
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-r0-201713 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   arch/x86/math-emu/fpu_entry.c: In function 'valid_prefix':
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   include/linux/compiler.h:178:40: note: in definition of macro 'likely'
# define likely(x) __builtin_expect(!!(x), 1)
   ^
   arch/x86/include/asm/uaccess.h:100:10: note: in expansion of macro 
'__range_not_ok'
 likely(!__range_not_ok(addr, size, user_addr_max()));  \
 ^
   arch/x86/math-emu/fpu_system.h:80:36: note: in expansion of macro 'access_ok'
#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^
   arch/x86/math-emu/fpu_system.h:95:72: note: in expansion of macro 'FPU_EIP'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
   ^
   arch/x86/math-emu/fpu_entry.c:558:2: note: in expansion of macro 
'FPU_code_access_ok'
 FPU_code_access_ok(1);
 ^
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   arch/x86/math-emu/fpu_system.h:81:16: note: in expansion of macro 'FPU_info'
math_abort(FPU_info,SIGSEGV)
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_entry.c:558:2: note: in expansion of macro 
'FPU_code_access_ok'
 FPU_code_access_ok(1);
 ^
   In file included from include/asm-generic/bug.h:4:0,
from arch/x86/include/asm/bug.h:35,
from include/linux/bug.h:4,
from include/linux/signal.h:4,
from arch/x86/math-emu/fpu_entry.c:27:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   include/linux/compiler.h:178:40: note: in definition of macro 'likely'
# define likely(x) __builtin_expect(!!(x), 1)
   ^
   arch/x86/include/asm/uaccess.h:100:10: note: in expansion of macro 
'__range_not_ok'
 likely(!__range_not_ok(addr, size, user_addr_max()));  \
 ^
   arch/x86/math-emu/fpu_system.h:80:36: note: in expansion of macro 'access_ok'
#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^
   arch/x86/math-emu/fpu_system.h:95:72: note: in expansion of macro 'FPU_EIP'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
   ^
   arch/x86/math-emu/fpu_entry.c:602:4: note: in expansion of macro 
'FPU_code_access_ok'
   FPU_code_access_ok(1);
   ^
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   arch/x86/math-emu/fpu_system.h:81:16: note: in expansion of macro 'FPU_info'
math_abort(FPU_info,SIGSEGV)
   ^
   arch/x86/mat

Re: [PATCH] x86/fpu: move FPU state into separate cache

2017-03-30 Thread kbuild test robot
Hi Kees,

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.11-rc4 next-20170330]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/x86-fpu-move-FPU-state-into-separate-cache/20170331-110507
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-r0-201713 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   arch/x86/math-emu/fpu_entry.c: In function 'valid_prefix':
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   include/linux/compiler.h:178:40: note: in definition of macro 'likely'
# define likely(x) __builtin_expect(!!(x), 1)
   ^
   arch/x86/include/asm/uaccess.h:100:10: note: in expansion of macro 
'__range_not_ok'
 likely(!__range_not_ok(addr, size, user_addr_max()));  \
 ^
   arch/x86/math-emu/fpu_system.h:80:36: note: in expansion of macro 'access_ok'
#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^
   arch/x86/math-emu/fpu_system.h:95:72: note: in expansion of macro 'FPU_EIP'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
   ^
   arch/x86/math-emu/fpu_entry.c:558:2: note: in expansion of macro 
'FPU_code_access_ok'
 FPU_code_access_ok(1);
 ^
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   arch/x86/math-emu/fpu_system.h:81:16: note: in expansion of macro 'FPU_info'
math_abort(FPU_info,SIGSEGV)
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_entry.c:558:2: note: in expansion of macro 
'FPU_code_access_ok'
 FPU_code_access_ok(1);
 ^
   In file included from include/asm-generic/bug.h:4:0,
from arch/x86/include/asm/bug.h:35,
from include/linux/bug.h:4,
from include/linux/signal.h:4,
from arch/x86/math-emu/fpu_entry.c:27:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   include/linux/compiler.h:178:40: note: in definition of macro 'likely'
# define likely(x) __builtin_expect(!!(x), 1)
   ^
   arch/x86/include/asm/uaccess.h:100:10: note: in expansion of macro 
'__range_not_ok'
 likely(!__range_not_ok(addr, size, user_addr_max()));  \
 ^
   arch/x86/math-emu/fpu_system.h:80:36: note: in expansion of macro 'access_ok'
#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
   ^
   arch/x86/math-emu/fpu_system.h:95:31: note: in expansion of macro 
'FPU_access_ok'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
  ^
   arch/x86/math-emu/fpu_system.h:57:20: note: in expansion of macro 'FPU_info'
#define FPU_EIP   (FPU_info->regs->ip)
   ^
   arch/x86/math-emu/fpu_system.h:95:72: note: in expansion of macro 'FPU_EIP'
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user 
*)FPU_EIP,z)
   ^
   arch/x86/math-emu/fpu_entry.c:602:4: note: in expansion of macro 
'FPU_code_access_ok'
   FPU_code_access_ok(1);
   ^
   In file included from arch/x86/math-emu/fpu_entry.c:35:0:
   arch/x86/math-emu/fpu_system.h:50:24: error: request for member 'soft' in 
something not a structure or union
#define FPU_info  (I387->soft.info)
   ^
   arch/x86/math-emu/fpu_system.h:81:16: note: in expansion of macro 'FPU_info'
math_abort(FPU_info,SIGSEGV)
   ^
   arch/x86/mat

[PATCH RFC] staging: ks7010: remove custom Michael MIC implementation

2017-03-30 Thread Tobin C. Harding
ks7010 currently uses a custom implementation of the Michael MIC
algorithm. The kernel has an implementation of this algorithm
already, we should use it.

Remove the custom implementation. Implement helper functions that call
the in-tree implementation through the crypto API. Update the
makefile. Replace driver calls to the custom implementation with calls
to the newly defined helper functions.

Signed-off-by: Tobin C. Harding 
---
 drivers/staging/ks7010/Makefile  |   2 +-
 drivers/staging/ks7010/ks_hostif.c   |  48 +++-
 drivers/staging/ks7010/ks_wlan.h |   3 +
 drivers/staging/ks7010/mic.c | 131 +++
 drivers/staging/ks7010/mic.h |  22 ++
 drivers/staging/ks7010/michael_mic.c | 148 ---
 drivers/staging/ks7010/michael_mic.h |  25 --
 7 files changed, 186 insertions(+), 193 deletions(-)
 create mode 100644 drivers/staging/ks7010/mic.c
 create mode 100644 drivers/staging/ks7010/mic.h
 delete mode 100644 drivers/staging/ks7010/michael_mic.c
 delete mode 100644 drivers/staging/ks7010/michael_mic.h

diff --git a/drivers/staging/ks7010/Makefile b/drivers/staging/ks7010/Makefile
index 69fcf8d..195b570 100644
--- a/drivers/staging/ks7010/Makefile
+++ b/drivers/staging/ks7010/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_KS7010) += ks7010.o
 
 ccflags-y   += -DKS_WLAN_DEBUG=0
-ks7010-y:= michael_mic.o ks_hostif.o ks_wlan_net.o ks7010_sdio.o
+ks7010-y:= mic.o ks_hostif.o ks_wlan_net.o ks7010_sdio.o
diff --git a/drivers/staging/ks7010/ks_hostif.c 
b/drivers/staging/ks7010/ks_hostif.c
index da7c42e..68ecbf7 100644
--- a/drivers/staging/ks7010/ks_hostif.c
+++ b/drivers/staging/ks7010/ks_hostif.c
@@ -12,7 +12,7 @@
 #include "ks_wlan.h"
 #include "ks_hostif.h"
 #include "eap_packet.h"
-#include "michael_mic.h"
+#include "mic.h"
 
 #include 
 #include 
@@ -315,7 +315,6 @@ void hostif_data_indication(struct ks_wlan_private *priv)
unsigned short auth_type;
unsigned char temp[256];
 
-   unsigned char RecvMIC[8];
char buf[128];
struct ether_hdr *eth_hdr;
unsigned short eth_proto;
@@ -323,8 +322,8 @@ void hostif_data_indication(struct ks_wlan_private *priv)
struct mic_failure_t *mic_failure;
struct ieee802_1x_hdr *aa1x_hdr;
struct wpa_eapol_key *eap_key;
-   struct michel_mic_t michel_mic;
union iwreq_data wrqu;
+   struct wpa_key_t *key;
 
DPRINTK(3, "\n");
 
@@ -337,6 +336,7 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 
auth_type = get_WORD(priv); /* AuthType */
get_WORD(priv); /* Reserve Area */
+   key = >wpa.key[auth_type - 1];
 
eth_hdr = (struct ether_hdr *)(priv->rxp);
eth_proto = ntohs(eth_hdr->h_proto);
@@ -372,18 +372,25 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 && priv->wpa.group_suite ==
 IW_AUTH_CIPHER_TKIP))
&& priv->wpa.key[auth_type - 1].key_len) {
+   u8 micrx[MICHAEL_MIC_LEN];
+   u8 mic[MICHAEL_MIC_LEN];
+   u8 *addr;
DPRINTK(4, "TKIP: protocol=%04X: size=%u\n",
eth_proto, priv->rx_size);
-   /* MIC save */
-   memcpy([0],
-  (priv->rxp) + ((priv->rx_size) - 8), 8);
-   priv->rx_size = priv->rx_size - 8;
+
+   addr = priv->rxp + priv->rx_size - 
MICHAEL_MIC_LEN;
+   memcpy(micrx, addr, MICHAEL_MIC_LEN);
+   priv->rx_size -= MICHAEL_MIC_LEN;
+
if (auth_type > 0 && auth_type < 4) {   /* 
auth_type check */
-   MichaelMICFunction(_mic, 
(uint8_t *) priv->wpa.key[auth_type - 1].rx_mic_key, (uint8_t *) priv->rxp, 
(int)priv->rx_size, (uint8_t) 0,/* priority */
-  (uint8_t *)
-  michel_mic.Result);
+   u8 priority = 0;
+   ks_wlan_mic(priv->rx_tfm_mic,
+   key->rx_mic_key, priority,
+   priv->rxp, priv->rx_size,
+   mic);
}
-   if (memcmp(michel_mic.Result, RecvMIC, 8)) {
+
+   if (memcmp(mic, micrx, MICHAEL_MIC_LEN) != 0) {
now = jiffies;
mic_failure = >wpa.mic_failure;
 

[PATCH RFC] remove custom Michael MIC implementation

2017-03-30 Thread Tobin C. Harding
This RFC can be applied on op on Linus' tree 89970a0

Configuration options needed to build are

CONFIG_STAGING=y
CONFIG_KS7010=m

The ks7010 driver currently uses a custom implementation of the
Michael MIC algorithm. The kernel has an implementation of this
algorithm already. This patch is an attempt to replace the custom
implementation with the in-tree implementation via the kernel
cryptography API.

I am not an expert on the Michael Message Integrity Check or on
cryptography in general. Actually, I'm not even an expert on kernel
development.

I believe I have mirrored the behavior of the custom implementation. I
do not know if I have done this completely correctly or, for that
matter, if I have gone about it correctly. The only in-tree driver I
could find that does the MIC check in software was the Orinoco driver
(net/wireless/intersil/orinoco). I based this code off of the Orinoco
code and the current implementation.

The whole thing is in one patch since there was no way to keep the
driver in a sane state during the implementation replacement.

The steps I took were as follows;

1. Remove the custom implementation, michael_mic.[ch]

2. Implement helper functions that call the kernel crypto API, this is
   the code that is based of Orinoco, mic.[ch]

3. Replace driver calls to the custom implementation with calls to the
   newly defined helper functions.


The code is untested, I have hardware in the mail.

If any one is interested and has any comments I would really like to
hear them. I am open to all suggestions (even down to trivial coding
style issues).

Thank you for taking the time to read this and for any tips you may be
able to give me.

thanks,
Tobin.

Tobin C. Harding (1):
  staging: ks7010: remove custom Michael MIC implementation

 drivers/staging/ks7010/Makefile  |   2 +-
 drivers/staging/ks7010/ks_hostif.c   |  48 +++-
 drivers/staging/ks7010/ks_wlan.h |   3 +
 drivers/staging/ks7010/mic.c | 131 +++
 drivers/staging/ks7010/mic.h |  22 ++
 drivers/staging/ks7010/michael_mic.c | 148 ---
 drivers/staging/ks7010/michael_mic.h |  25 --
 7 files changed, 186 insertions(+), 193 deletions(-)
 create mode 100644 drivers/staging/ks7010/mic.c
 create mode 100644 drivers/staging/ks7010/mic.h
 delete mode 100644 drivers/staging/ks7010/michael_mic.c
 delete mode 100644 drivers/staging/ks7010/michael_mic.h

-- 
2.7.4



[PATCH RFC] staging: ks7010: remove custom Michael MIC implementation

2017-03-30 Thread Tobin C. Harding
ks7010 currently uses a custom implementation of the Michael MIC
algorithm. The kernel has an implementation of this algorithm
already, we should use it.

Remove the custom implementation. Implement helper functions that call
the in-tree implementation through the crypto API. Update the
makefile. Replace driver calls to the custom implementation with calls
to the newly defined helper functions.

Signed-off-by: Tobin C. Harding 
---
 drivers/staging/ks7010/Makefile  |   2 +-
 drivers/staging/ks7010/ks_hostif.c   |  48 +++-
 drivers/staging/ks7010/ks_wlan.h |   3 +
 drivers/staging/ks7010/mic.c | 131 +++
 drivers/staging/ks7010/mic.h |  22 ++
 drivers/staging/ks7010/michael_mic.c | 148 ---
 drivers/staging/ks7010/michael_mic.h |  25 --
 7 files changed, 186 insertions(+), 193 deletions(-)
 create mode 100644 drivers/staging/ks7010/mic.c
 create mode 100644 drivers/staging/ks7010/mic.h
 delete mode 100644 drivers/staging/ks7010/michael_mic.c
 delete mode 100644 drivers/staging/ks7010/michael_mic.h

diff --git a/drivers/staging/ks7010/Makefile b/drivers/staging/ks7010/Makefile
index 69fcf8d..195b570 100644
--- a/drivers/staging/ks7010/Makefile
+++ b/drivers/staging/ks7010/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_KS7010) += ks7010.o
 
 ccflags-y   += -DKS_WLAN_DEBUG=0
-ks7010-y:= michael_mic.o ks_hostif.o ks_wlan_net.o ks7010_sdio.o
+ks7010-y:= mic.o ks_hostif.o ks_wlan_net.o ks7010_sdio.o
diff --git a/drivers/staging/ks7010/ks_hostif.c 
b/drivers/staging/ks7010/ks_hostif.c
index da7c42e..68ecbf7 100644
--- a/drivers/staging/ks7010/ks_hostif.c
+++ b/drivers/staging/ks7010/ks_hostif.c
@@ -12,7 +12,7 @@
 #include "ks_wlan.h"
 #include "ks_hostif.h"
 #include "eap_packet.h"
-#include "michael_mic.h"
+#include "mic.h"
 
 #include 
 #include 
@@ -315,7 +315,6 @@ void hostif_data_indication(struct ks_wlan_private *priv)
unsigned short auth_type;
unsigned char temp[256];
 
-   unsigned char RecvMIC[8];
char buf[128];
struct ether_hdr *eth_hdr;
unsigned short eth_proto;
@@ -323,8 +322,8 @@ void hostif_data_indication(struct ks_wlan_private *priv)
struct mic_failure_t *mic_failure;
struct ieee802_1x_hdr *aa1x_hdr;
struct wpa_eapol_key *eap_key;
-   struct michel_mic_t michel_mic;
union iwreq_data wrqu;
+   struct wpa_key_t *key;
 
DPRINTK(3, "\n");
 
@@ -337,6 +336,7 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 
auth_type = get_WORD(priv); /* AuthType */
get_WORD(priv); /* Reserve Area */
+   key = >wpa.key[auth_type - 1];
 
eth_hdr = (struct ether_hdr *)(priv->rxp);
eth_proto = ntohs(eth_hdr->h_proto);
@@ -372,18 +372,25 @@ void hostif_data_indication(struct ks_wlan_private *priv)
 && priv->wpa.group_suite ==
 IW_AUTH_CIPHER_TKIP))
&& priv->wpa.key[auth_type - 1].key_len) {
+   u8 micrx[MICHAEL_MIC_LEN];
+   u8 mic[MICHAEL_MIC_LEN];
+   u8 *addr;
DPRINTK(4, "TKIP: protocol=%04X: size=%u\n",
eth_proto, priv->rx_size);
-   /* MIC save */
-   memcpy([0],
-  (priv->rxp) + ((priv->rx_size) - 8), 8);
-   priv->rx_size = priv->rx_size - 8;
+
+   addr = priv->rxp + priv->rx_size - 
MICHAEL_MIC_LEN;
+   memcpy(micrx, addr, MICHAEL_MIC_LEN);
+   priv->rx_size -= MICHAEL_MIC_LEN;
+
if (auth_type > 0 && auth_type < 4) {   /* 
auth_type check */
-   MichaelMICFunction(_mic, 
(uint8_t *) priv->wpa.key[auth_type - 1].rx_mic_key, (uint8_t *) priv->rxp, 
(int)priv->rx_size, (uint8_t) 0,/* priority */
-  (uint8_t *)
-  michel_mic.Result);
+   u8 priority = 0;
+   ks_wlan_mic(priv->rx_tfm_mic,
+   key->rx_mic_key, priority,
+   priv->rxp, priv->rx_size,
+   mic);
}
-   if (memcmp(michel_mic.Result, RecvMIC, 8)) {
+
+   if (memcmp(mic, micrx, MICHAEL_MIC_LEN) != 0) {
now = jiffies;
mic_failure = >wpa.mic_failure;
/* MIC 

[PATCH RFC] remove custom Michael MIC implementation

2017-03-30 Thread Tobin C. Harding
This RFC can be applied on op on Linus' tree 89970a0

Configuration options needed to build are

CONFIG_STAGING=y
CONFIG_KS7010=m

The ks7010 driver currently uses a custom implementation of the
Michael MIC algorithm. The kernel has an implementation of this
algorithm already. This patch is an attempt to replace the custom
implementation with the in-tree implementation via the kernel
cryptography API.

I am not an expert on the Michael Message Integrity Check or on
cryptography in general. Actually, I'm not even an expert on kernel
development.

I believe I have mirrored the behavior of the custom implementation. I
do not know if I have done this completely correctly or, for that
matter, if I have gone about it correctly. The only in-tree driver I
could find that does the MIC check in software was the Orinoco driver
(net/wireless/intersil/orinoco). I based this code off of the Orinoco
code and the current implementation.

The whole thing is in one patch since there was no way to keep the
driver in a sane state during the implementation replacement.

The steps I took were as follows;

1. Remove the custom implementation, michael_mic.[ch]

2. Implement helper functions that call the kernel crypto API, this is
   the code that is based of Orinoco, mic.[ch]

3. Replace driver calls to the custom implementation with calls to the
   newly defined helper functions.


The code is untested, I have hardware in the mail.

If any one is interested and has any comments I would really like to
hear them. I am open to all suggestions (even down to trivial coding
style issues).

Thank you for taking the time to read this and for any tips you may be
able to give me.

thanks,
Tobin.

Tobin C. Harding (1):
  staging: ks7010: remove custom Michael MIC implementation

 drivers/staging/ks7010/Makefile  |   2 +-
 drivers/staging/ks7010/ks_hostif.c   |  48 +++-
 drivers/staging/ks7010/ks_wlan.h |   3 +
 drivers/staging/ks7010/mic.c | 131 +++
 drivers/staging/ks7010/mic.h |  22 ++
 drivers/staging/ks7010/michael_mic.c | 148 ---
 drivers/staging/ks7010/michael_mic.h |  25 --
 7 files changed, 186 insertions(+), 193 deletions(-)
 create mode 100644 drivers/staging/ks7010/mic.c
 create mode 100644 drivers/staging/ks7010/mic.h
 delete mode 100644 drivers/staging/ks7010/michael_mic.c
 delete mode 100644 drivers/staging/ks7010/michael_mic.h

-- 
2.7.4



Re: [PATCH v4 0/7] PCI: dwc: Miscellaneous fixes and cleanups

2017-03-30 Thread Kishon Vijay Abraham I


On Friday 31 March 2017 05:14 AM, Bjorn Helgaas wrote:
> On Mon, Mar 13, 2017 at 07:13:21PM +0530, Kishon Vijay Abraham I wrote:
>> This should be the final set of cleanups/fixes before endpoint
>> support can be merged.
>>
>> Keerthy's patch is a general fix in dra7xx driver and is not
>> directly related to endpoint mode.
>>
>> This v1 of this series was previously sent with a different
>> cover letter $subject [1]
>>
>> Changes from v3:
>> *) instead of changing all the callers of dbi accessors (taking
>>dbi_base and size), manage it using static inline as suggested
>>by Niklas (used static inline instead of macros because of
>>checkpatch error).
>>
>> Changes from v2:
>> *) Kconfig changes that was spilled into a patch is removed.
>> *) In addition to renaming _unroll() to _ob_unroll(), all the
>>_unroll configurations is also moved a separate function.
>>
>> Changes from v1:
>> *) included a patch to rename _unroll() to _ob_unroll() as
>>similar thing has to be done for inbound window in the case
>>of EP mode.
>> *) used 'size_t' instead of 'int' for specifying the size
>>in read_dbi/write_dbi function arguments.
>> *) Populate cpu_addr_fixup ops for artpec6 as suggested by
>>Niklas
>>
>> This series is based on 4.11-rc1
>>
>> [1] -> https://lkml.org/lkml/2017/2/16/270
>>
>> Keerthy (1):
>>   PCI: dwc: dra7xx: Push request_irq call to the bottom of probe
>>
>> Kishon Vijay Abraham I (6):
>>   PCI: dwc: designware: Add new *ops* for cpu addr fixup
>>   PCI: dwc: dra7xx: Populate cpu_addr_fixup ops
>>   PCI: dwc: artpec6: Populate cpu_addr_fixup ops
>>   PCI: dwc: all: Modify dbi accessors to take dbi_base as argument
>>   PCI: dwc: all: Modify dbi accessors to access data of  4/2/1 bytes
>>   PCI: dwc: designware: Move _unroll configurations to a separate
>> function
>>
>>  drivers/pci/dwc/pci-dra7xx.c  |  25 +++
>>  drivers/pci/dwc/pci-exynos.c  |  14 ++--
>>  drivers/pci/dwc/pcie-artpec6.c|  15 +++--
>>  drivers/pci/dwc/pcie-designware.c | 133 
>> --
>>  drivers/pci/dwc/pcie-designware.h |  23 +--
>>  5 files changed, 135 insertions(+), 75 deletions(-)
> 
> Applied to pci/host-designware for v4.12, with Niklas' ack on patches
> 4 & 5, thanks!

Thanks Bjorn.

-Kishon


Re: [PATCH v4 0/7] PCI: dwc: Miscellaneous fixes and cleanups

2017-03-30 Thread Kishon Vijay Abraham I


On Friday 31 March 2017 05:14 AM, Bjorn Helgaas wrote:
> On Mon, Mar 13, 2017 at 07:13:21PM +0530, Kishon Vijay Abraham I wrote:
>> This should be the final set of cleanups/fixes before endpoint
>> support can be merged.
>>
>> Keerthy's patch is a general fix in dra7xx driver and is not
>> directly related to endpoint mode.
>>
>> This v1 of this series was previously sent with a different
>> cover letter $subject [1]
>>
>> Changes from v3:
>> *) instead of changing all the callers of dbi accessors (taking
>>dbi_base and size), manage it using static inline as suggested
>>by Niklas (used static inline instead of macros because of
>>checkpatch error).
>>
>> Changes from v2:
>> *) Kconfig changes that was spilled into a patch is removed.
>> *) In addition to renaming _unroll() to _ob_unroll(), all the
>>_unroll configurations is also moved a separate function.
>>
>> Changes from v1:
>> *) included a patch to rename _unroll() to _ob_unroll() as
>>similar thing has to be done for inbound window in the case
>>of EP mode.
>> *) used 'size_t' instead of 'int' for specifying the size
>>in read_dbi/write_dbi function arguments.
>> *) Populate cpu_addr_fixup ops for artpec6 as suggested by
>>Niklas
>>
>> This series is based on 4.11-rc1
>>
>> [1] -> https://lkml.org/lkml/2017/2/16/270
>>
>> Keerthy (1):
>>   PCI: dwc: dra7xx: Push request_irq call to the bottom of probe
>>
>> Kishon Vijay Abraham I (6):
>>   PCI: dwc: designware: Add new *ops* for cpu addr fixup
>>   PCI: dwc: dra7xx: Populate cpu_addr_fixup ops
>>   PCI: dwc: artpec6: Populate cpu_addr_fixup ops
>>   PCI: dwc: all: Modify dbi accessors to take dbi_base as argument
>>   PCI: dwc: all: Modify dbi accessors to access data of  4/2/1 bytes
>>   PCI: dwc: designware: Move _unroll configurations to a separate
>> function
>>
>>  drivers/pci/dwc/pci-dra7xx.c  |  25 +++
>>  drivers/pci/dwc/pci-exynos.c  |  14 ++--
>>  drivers/pci/dwc/pcie-artpec6.c|  15 +++--
>>  drivers/pci/dwc/pcie-designware.c | 133 
>> --
>>  drivers/pci/dwc/pcie-designware.h |  23 +--
>>  5 files changed, 135 insertions(+), 75 deletions(-)
> 
> Applied to pci/host-designware for v4.12, with Niklas' ack on patches
> 4 & 5, thanks!

Thanks Bjorn.

-Kishon


[PATCH] arm64: dts: uniphier: add input-delay properties to Cadence eMMC node

2017-03-30 Thread Masahiro Yamada
Since commit a04e2b383401 ("mmc: sdhci-cadence: Update PHY delay
configuration"), PHY parameters must be specified by DT.

The hard-coded settings have been converted as follows:
- SDHCI_CDNS_PHY_DLY_SD_DEFAULT -> cdns,phy-input-delay-legacy
- SDHCI_CDNS_PHY_DLY_EMMC_SDR   -> cdns,phy-input-delay-mmc-highspeed
- SDHCI_CDNS_PHY_DLY_EMMC_DDR   -> cdns,phy-input-delay-mmc-ddr

The following have not been moved:
- SDHCI_CDNS_PHY_DLY_SD_HS
   this is unneeded in the eMMC configuration
- SDHCI_CDNS_PHY_DLY_EMMC_LEGACY
   this is never enabled by the driver as it is covered by
   SDHCI_CDNS_PHY_DLY_SD_DEFAULT

Signed-off-by: Masahiro Yamada 
---

 arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi | 3 +++
 arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi 
b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
index 5dc5124..b6ebdc9 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
@@ -310,6 +310,9 @@
bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+   cdns,phy-input-delay-legacy = <4>;
+   cdns,phy-input-delay-mmc-highspeed = <2>;
+   cdns,phy-input-delay-mmc-ddr = <3>;
};
 
usb0: usb@5a800100 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi 
b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index 6c9a72d..0ab6c2e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -394,6 +394,9 @@
bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+   cdns,phy-input-delay-legacy = <4>;
+   cdns,phy-input-delay-mmc-highspeed = <2>;
+   cdns,phy-input-delay-mmc-ddr = <3>;
};
 
sd: sdhc@5a40 {
-- 
2.7.4



[PATCH] arm64: dts: uniphier: add input-delay properties to Cadence eMMC node

2017-03-30 Thread Masahiro Yamada
Since commit a04e2b383401 ("mmc: sdhci-cadence: Update PHY delay
configuration"), PHY parameters must be specified by DT.

The hard-coded settings have been converted as follows:
- SDHCI_CDNS_PHY_DLY_SD_DEFAULT -> cdns,phy-input-delay-legacy
- SDHCI_CDNS_PHY_DLY_EMMC_SDR   -> cdns,phy-input-delay-mmc-highspeed
- SDHCI_CDNS_PHY_DLY_EMMC_DDR   -> cdns,phy-input-delay-mmc-ddr

The following have not been moved:
- SDHCI_CDNS_PHY_DLY_SD_HS
   this is unneeded in the eMMC configuration
- SDHCI_CDNS_PHY_DLY_EMMC_LEGACY
   this is never enabled by the driver as it is covered by
   SDHCI_CDNS_PHY_DLY_SD_DEFAULT

Signed-off-by: Masahiro Yamada 
---

 arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi | 3 +++
 arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi 
b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
index 5dc5124..b6ebdc9 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
@@ -310,6 +310,9 @@
bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+   cdns,phy-input-delay-legacy = <4>;
+   cdns,phy-input-delay-mmc-highspeed = <2>;
+   cdns,phy-input-delay-mmc-ddr = <3>;
};
 
usb0: usb@5a800100 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi 
b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index 6c9a72d..0ab6c2e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -394,6 +394,9 @@
bus-width = <8>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+   cdns,phy-input-delay-legacy = <4>;
+   cdns,phy-input-delay-mmc-highspeed = <2>;
+   cdns,phy-input-delay-mmc-ddr = <3>;
};
 
sd: sdhc@5a40 {
-- 
2.7.4



linux-next: manual merge of the mux tree with the arm-soc tree

2017-03-30 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the mux tree got conflicts in:

  drivers/Kconfig
  drivers/Makefile

between commit:

  967c9cca2cc5 ("tee: generic TEE subsystem")

from the arm-soc tree and commit:

  1fc1dd988186 ("mux: minimal mux subsystem and gpio-based mux controller")

from the mux tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/Kconfig
index ba2901e76769,a7ea13e1b869..
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@@ -204,6 -204,6 +204,8 @@@ source "drivers/fpga/Kconfig
  
  source "drivers/fsi/Kconfig"
  
+ source "drivers/mux/Kconfig"
+ 
 +source "drivers/tee/Kconfig"
 +
  endmenu
diff --cc drivers/Makefile
index 5db9aa6beeaf,c0436f6dd5a9..
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@@ -177,4 -177,4 +177,5 @@@ obj-$(CONFIG_ANDROID)  += android
  obj-$(CONFIG_NVMEM)   += nvmem/
  obj-$(CONFIG_FPGA)+= fpga/
  obj-$(CONFIG_FSI) += fsi/
+ obj-$(CONFIG_MULTIPLEXER) += mux/
 +obj-$(CONFIG_TEE) += tee/


linux-next: manual merge of the mux tree with the arm-soc tree

2017-03-30 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the mux tree got conflicts in:

  drivers/Kconfig
  drivers/Makefile

between commit:

  967c9cca2cc5 ("tee: generic TEE subsystem")

from the arm-soc tree and commit:

  1fc1dd988186 ("mux: minimal mux subsystem and gpio-based mux controller")

from the mux tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/Kconfig
index ba2901e76769,a7ea13e1b869..
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@@ -204,6 -204,6 +204,8 @@@ source "drivers/fpga/Kconfig
  
  source "drivers/fsi/Kconfig"
  
+ source "drivers/mux/Kconfig"
+ 
 +source "drivers/tee/Kconfig"
 +
  endmenu
diff --cc drivers/Makefile
index 5db9aa6beeaf,c0436f6dd5a9..
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@@ -177,4 -177,4 +177,5 @@@ obj-$(CONFIG_ANDROID)  += android
  obj-$(CONFIG_NVMEM)   += nvmem/
  obj-$(CONFIG_FPGA)+= fpga/
  obj-$(CONFIG_FSI) += fsi/
+ obj-$(CONFIG_MULTIPLEXER) += mux/
 +obj-$(CONFIG_TEE) += tee/


[PATCH v3] xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends of files

2017-03-30 Thread Calvin Owens
When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will
round the file size up to the nearest multiple of PAGE_SIZE:

  calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
Size: 2048Blocks: 8  IO Block: 4096   regular file
  calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
Size: 4096Blocks: 8  IO Block: 4096   regular file

Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced
xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers
don't enforce that [pos,offset) lies strictly on [0,i_size) when being
called from xfs_free_file_space(), so by "leaking" these ranges into
xfs_zero_range() we get this buggy behavior.

Fix this by reintroducing the checks xfs_zero_remaining_bytes() did
against i_size at the bottom of xfs_free_file_space().

Reported-by: Aaron Gao 
Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes")
Cc: Christoph Hellwig 
Cc: Brian Foster 
Cc:  # 4.8+
Signed-off-by: Calvin Owens 
---
 fs/xfs/xfs_bmap_util.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dce..828532c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
/*
 * Now that we've unmap all full blocks we'll have to zero out any
 * partial block at the beginning and/or end.  xfs_zero_range is
-* smart enough to skip any holes, including those we just created.
+* smart enough to skip any holes, including those we just created,
+* but we must take care not to zero beyond EOF and enlarge i_size.
 */
+
+   if (offset >= XFS_ISIZE(ip))
+   return 0;
+
+   if (offset + len > XFS_ISIZE(ip))
+   len = XFS_ISIZE(ip) - offset;
+
return xfs_zero_range(ip, offset, len, NULL);
 }
 
-- 
2.9.3



[PATCH v3] xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends of files

2017-03-30 Thread Calvin Owens
When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will
round the file size up to the nearest multiple of PAGE_SIZE:

  calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
Size: 2048Blocks: 8  IO Block: 4096   regular file
  calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
Size: 4096Blocks: 8  IO Block: 4096   regular file

Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced
xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers
don't enforce that [pos,offset) lies strictly on [0,i_size) when being
called from xfs_free_file_space(), so by "leaking" these ranges into
xfs_zero_range() we get this buggy behavior.

Fix this by reintroducing the checks xfs_zero_remaining_bytes() did
against i_size at the bottom of xfs_free_file_space().

Reported-by: Aaron Gao 
Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes")
Cc: Christoph Hellwig 
Cc: Brian Foster 
Cc:  # 4.8+
Signed-off-by: Calvin Owens 
---
 fs/xfs/xfs_bmap_util.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dce..828532c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
/*
 * Now that we've unmap all full blocks we'll have to zero out any
 * partial block at the beginning and/or end.  xfs_zero_range is
-* smart enough to skip any holes, including those we just created.
+* smart enough to skip any holes, including those we just created,
+* but we must take care not to zero beyond EOF and enlarge i_size.
 */
+
+   if (offset >= XFS_ISIZE(ip))
+   return 0;
+
+   if (offset + len > XFS_ISIZE(ip))
+   len = XFS_ISIZE(ip) - offset;
+
return xfs_zero_range(ip, offset, len, NULL);
 }
 
-- 
2.9.3



Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread Jaegeuk Kim
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
> 
> Can we split in place update bios into single sbi->f2fs_bio_info for more page
> merged in out place update? This case can be show as below:
> 
> in place update submit a bio with one page
>   out place update submit a bio with one page
> in place update submit a bio with one page
>   out place update submit a bio with one page
> ... ...
> 
> just like WB_SYNC_ALL and WB_SYNC_NONE case.

Something like this?

>From d9f00695c5e56c48611ade3ced89432ef2b59a27 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim 
Date: Thu, 30 Mar 2017 21:02:46 -0700
Subject: [PATCH] f2fs: submit bio of in-place-update pages

This patch tries to split in-place-update bios from sequential bios.

Suggested-by: Yunlei He 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c| 2 +-
 fs/f2fs/f2fs.h| 2 +-
 fs/f2fs/segment.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b1cac6d85bcb..1392e7c153bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1354,7 +1354,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
-   rewrite_data_page(fio);
+   err = rewrite_data_page(fio);
set_inode_flag(inode, FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd39db681226..5a2b8cd13c92 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2207,7 +2207,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void 
*src, block_t blk_addr);
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
 void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-void rewrite_data_page(struct f2fs_io_info *fio);
+int rewrite_data_page(struct f2fs_io_info *fio);
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bff3f3bc7827..eedbed62947f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1973,11 +1973,11 @@ void write_data_page(struct dnode_of_data *dn, struct 
f2fs_io_info *fio)
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
 }
 
-void rewrite_data_page(struct f2fs_io_info *fio)
+int rewrite_data_page(struct f2fs_io_info *fio)
 {
fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
-   f2fs_submit_page_mbio(fio);
+   return f2fs_submit_page_bio(fio);
 }
 
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
-- 
2.11.0



Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread Jaegeuk Kim
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
> 
> Can we split in place update bios into single sbi->f2fs_bio_info for more page
> merged in out place update? This case can be show as below:
> 
> in place update submit a bio with one page
>   out place update submit a bio with one page
> in place update submit a bio with one page
>   out place update submit a bio with one page
> ... ...
> 
> just like WB_SYNC_ALL and WB_SYNC_NONE case.

Something like this?

>From d9f00695c5e56c48611ade3ced89432ef2b59a27 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim 
Date: Thu, 30 Mar 2017 21:02:46 -0700
Subject: [PATCH] f2fs: submit bio of in-place-update pages

This patch tries to split in-place-update bios from sequential bios.

Suggested-by: Yunlei He 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c| 2 +-
 fs/f2fs/f2fs.h| 2 +-
 fs/f2fs/segment.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b1cac6d85bcb..1392e7c153bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1354,7 +1354,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
-   rewrite_data_page(fio);
+   err = rewrite_data_page(fio);
set_inode_flag(inode, FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd39db681226..5a2b8cd13c92 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2207,7 +2207,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void 
*src, block_t blk_addr);
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
 void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-void rewrite_data_page(struct f2fs_io_info *fio);
+int rewrite_data_page(struct f2fs_io_info *fio);
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bff3f3bc7827..eedbed62947f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1973,11 +1973,11 @@ void write_data_page(struct dnode_of_data *dn, struct 
f2fs_io_info *fio)
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
 }
 
-void rewrite_data_page(struct f2fs_io_info *fio)
+int rewrite_data_page(struct f2fs_io_info *fio)
 {
fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
-   f2fs_submit_page_mbio(fio);
+   return f2fs_submit_page_bio(fio);
 }
 
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
-- 
2.11.0



Re: [PATCH 11/16] fpga: intel: fme: add partial reconfiguration sub feature support

2017-03-30 Thread Xiao Guangrong



On 31/03/2017 4:30 AM, Alan Tull wrote:

On Thu, Mar 30, 2017 at 7:08 AM, Wu Hao  wrote:

From: Kang Luwei 

Partial Reconfiguration (PR) is the most important function for FME. It
allows reconfiguration for given Port/Accelerated Function Unit (AFU).

This patch adds support for PR sub feature. In this patch, it registers
a fpga_mgr and implements fpga_manager_ops, and invoke fpga_mgr_buf_load
for PR operation once PR request received via ioctl. Below user space
interfaces are exposed by this sub feature.

Sysfs interface:
* /sys/class/fpga///interface_id
  Read-only. Indicate the hardware interface information. Userspace
  applications need to check this interface to select correct green
  bitstream format before PR.

Ioctl interface:
* FPGA_FME_PORT_PR
  Do partial reconfiguration per information from userspace, including
  target port(AFU), buffer size and address info. It returns the PR status
  (PR error code if failed) to userspace.

Signed-off-by: Tim Whisonant 
Signed-off-by: Enno Luebbers 
Signed-off-by: Shiva Rao 
Signed-off-by: Christopher Rauer 
Signed-off-by: Alan Tull 


Hi Wu Hao,

Thanks for submitting your patches.

I think there's been a misunderstanding of the meaning of
'Signed-off-by' [1].  I have not signed off on this code or had a hand
in its development.  But I'm happy to get to review it now.  It will
take a bit of time; I expect to be replying next week.


Hi Alan,

Sorry to confuse you, i think it's because you helped Chris a lot to
implement this interface and we'd like to include your credit as this
way. If you dislike, it will be dropped. :)

Thanks for your review in advance.




Re: [PATCH 11/16] fpga: intel: fme: add partial reconfiguration sub feature support

2017-03-30 Thread Xiao Guangrong



On 31/03/2017 4:30 AM, Alan Tull wrote:

On Thu, Mar 30, 2017 at 7:08 AM, Wu Hao  wrote:

From: Kang Luwei 

Partial Reconfiguration (PR) is the most important function for FME. It
allows reconfiguration for given Port/Accelerated Function Unit (AFU).

This patch adds support for PR sub feature. In this patch, it registers
a fpga_mgr and implements fpga_manager_ops, and invoke fpga_mgr_buf_load
for PR operation once PR request received via ioctl. Below user space
interfaces are exposed by this sub feature.

Sysfs interface:
* /sys/class/fpga///interface_id
  Read-only. Indicate the hardware interface information. Userspace
  applications need to check this interface to select correct green
  bitstream format before PR.

Ioctl interface:
* FPGA_FME_PORT_PR
  Do partial reconfiguration per information from userspace, including
  target port(AFU), buffer size and address info. It returns the PR status
  (PR error code if failed) to userspace.

Signed-off-by: Tim Whisonant 
Signed-off-by: Enno Luebbers 
Signed-off-by: Shiva Rao 
Signed-off-by: Christopher Rauer 
Signed-off-by: Alan Tull 


Hi Wu Hao,

Thanks for submitting your patches.

I think there's been a misunderstanding of the meaning of
'Signed-off-by' [1].  I have not signed off on this code or had a hand
in its development.  But I'm happy to get to review it now.  It will
take a bit of time; I expect to be replying next week.


Hi Alan,

Sorry to confuse you, i think it's because you helped Chris a lot to
implement this interface and we'd like to include your credit as this
way. If you dislike, it will be dropped. :)

Thanks for your review in advance.




Re: [PATCH V2 net-next 5/7] tun: support receiving skb through msg_control

2017-03-30 Thread Jason Wang



On 2017年03月30日 23:06, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:28PM +0800, Jason Wang wrote:

This patch makes tun_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang

Do we need to bother with tun? I didn't realize one
can even use that with vhost. What would be the point of
all the virtio header stuff dealing with checksums etc?

Even if you see a use-case is it worth optimizing?




It's for tap in fact. I use "tun" just because we have already had a 
tap.c which is used by macvtap.


Thanks


Re: [PATCH V2 net-next 5/7] tun: support receiving skb through msg_control

2017-03-30 Thread Jason Wang



On 2017年03月30日 23:06, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:28PM +0800, Jason Wang wrote:

This patch makes tun_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang

Do we need to bother with tun? I didn't realize one
can even use that with vhost. What would be the point of
all the virtio header stuff dealing with checksums etc?

Even if you see a use-case is it worth optimizing?




It's for tap in fact. I use "tun" just because we have already had a 
tap.c which is used by macvtap.


Thanks


Re: [PATCH V2 net-next 6/7] tap: support receiving skb from msg_control

2017-03-30 Thread Jason Wang



On 2017年03月30日 23:03, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:29PM +0800, Jason Wang wrote:

This patch makes tap_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang
---
  drivers/net/tap.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index abdaf86..07d9174 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -824,15 +824,17 @@ static ssize_t tap_put_user(struct tap_queue *q,
  
  static ssize_t tap_do_read(struct tap_queue *q,

   struct iov_iter *to,
-  int noblock)
+  int noblock, struct sk_buff *skb)
  {
DEFINE_WAIT(wait);
-   struct sk_buff *skb;
ssize_t ret = 0;
  
  	if (!iov_iter_count(to))

return 0;
  
+	if (skb)

+   goto done;
+
while (1) {
if (!noblock)
prepare_to_wait(sk_sleep(>sk), ,
@@ -856,6 +858,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
if (!noblock)
finish_wait(sk_sleep(>sk), );
  
+done:

Please just use an if {} block here. goto on error is ok,
but we are far from done here and goto done is misleading.




Ok.

Thanks.


Re: [PATCH V2 net-next 6/7] tap: support receiving skb from msg_control

2017-03-30 Thread Jason Wang



On 2017年03月30日 23:03, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:29PM +0800, Jason Wang wrote:

This patch makes tap_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang
---
  drivers/net/tap.c | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index abdaf86..07d9174 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -824,15 +824,17 @@ static ssize_t tap_put_user(struct tap_queue *q,
  
  static ssize_t tap_do_read(struct tap_queue *q,

   struct iov_iter *to,
-  int noblock)
+  int noblock, struct sk_buff *skb)
  {
DEFINE_WAIT(wait);
-   struct sk_buff *skb;
ssize_t ret = 0;
  
  	if (!iov_iter_count(to))

return 0;
  
+	if (skb)

+   goto done;
+
while (1) {
if (!noblock)
prepare_to_wait(sk_sleep(>sk), ,
@@ -856,6 +858,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
if (!noblock)
finish_wait(sk_sleep(>sk), );
  
+done:

Please just use an if {} block here. goto on error is ok,
but we are far from done here and goto done is misleading.




Ok.

Thanks.


Re: [PATCH] zram: factor out partial IO routine

2017-03-30 Thread Sergey Senozhatsky
Hello,

On (03/31/17 08:33), Minchan Kim wrote:
[..]
> > a bigger side effect is that now we double the amount of lines we need
> > to change in certain patches and, thus, the amount of work - when we
> > add new functionality/fix something in zram_bvec_{write, read} we also
> > would need to touch zram_bvec_partial_{write, read}.
> 
> Yes, that is a pain, too. However, I thought it would be more easier
> because as-is partial IO routine is more error-prone to me. :)

yeah, it is quite tricky and a bit difficult. agree.

> > still probably worth it.
> > 
> > Reviewed-by: Sergey Senozhatsky 
>  
> Thanks for the review.
> so I tried clean-up further to make you happy. :)
> 
> How about this?
> It's totally untested and I have no time until Monday next week.
> So, please review with having enough time.

will take a look :)
thanks!

-ss


Re: [PATCH] zram: factor out partial IO routine

2017-03-30 Thread Sergey Senozhatsky
Hello,

On (03/31/17 08:33), Minchan Kim wrote:
[..]
> > a bigger side effect is that now we double the amount of lines we need
> > to change in certain patches and, thus, the amount of work - when we
> > add new functionality/fix something in zram_bvec_{write, read} we also
> > would need to touch zram_bvec_partial_{write, read}.
> 
> Yes, that is a pain, too. However, I thought it would be more easier
> because as-is partial IO routine is more error-prone to me. :)

yeah, it is quite tricky and a bit difficult. agree.

> > still probably worth it.
> > 
> > Reviewed-by: Sergey Senozhatsky 
>  
> Thanks for the review.
> so I tried clean-up further to make you happy. :)
> 
> How about this?
> It's totally untested and I have no time until Monday next week.
> So, please review with having enough time.

will take a look :)
thanks!

-ss


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-30 Thread Fam Zheng
On Thu, 03/30 11:30, Martin K. Petersen wrote:
> Fam Zheng  writes:
> 
> >>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> >>   BLK_DEF_MAX_SECTORS);
> >
> > Yes, it is better. Is it okay to make the change when you apply?
> 
> Sure. Applied to 4.11/scsi-fixes.

Cool. Thanks!

Fam


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-30 Thread Fam Zheng
On Thu, 03/30 11:30, Martin K. Petersen wrote:
> Fam Zheng  writes:
> 
> >>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> >>   BLK_DEF_MAX_SECTORS);
> >
> > Yes, it is better. Is it okay to make the change when you apply?
> 
> Sure. Applied to 4.11/scsi-fixes.

Cool. Thanks!

Fam


Re: [PATCH v3 00/37] mtd: nand: denali: 2nd round of Denali NAND IP patch bomb

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-31 1:38 GMT+09:00 Boris Brezillon :

> The rest looks almost good, except for a few comments I had on patch
> 14, 18, 25, 26 and 32.
>
> I'll probably apply 33 and 34 soon.
>

Thank you!
Please note I left a minor comment on 34.
(Accidental addition of braces.)


-- 
Best Regards
Masahiro Yamada


Re: [PATCH v3 00/37] mtd: nand: denali: 2nd round of Denali NAND IP patch bomb

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-31 1:38 GMT+09:00 Boris Brezillon :

> The rest looks almost good, except for a few comments I had on patch
> 14, 18, 25, 26 and 32.
>
> I'll probably apply 33 and 34 soon.
>

Thank you!
Please note I left a minor comment on 34.
(Accidental addition of braces.)


-- 
Best Regards
Masahiro Yamada


Re: [PATCH 1/6] virtio: wrap find_vqs

2017-03-30 Thread Jason Wang



On 2017年03月30日 22:32, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 02:00:08PM +0800, Jason Wang wrote:


On 2017年03月30日 04:48, Michael S. Tsirkin wrote:

We are going to add more parameters to find_vqs, let's wrap the call so
we don't need to tweak all drivers every time.

Signed-off-by: Michael S. Tsirkin
---

A quick glance and it looks ok, but what the benefit of this series, is it
required by other changes?

Thanks

Yes - to avoid touching all devices when doing the rest of
the patchset.


Maybe I'm not clear. I mean the benefit of this series not this single 
patch. I guess it may be used by you proposal that avoid reset when set 
XDP? If yes, do we really want to drop some packets after XDP is set?


Thanks


Re: [PATCH 1/6] virtio: wrap find_vqs

2017-03-30 Thread Jason Wang



On 2017年03月30日 22:32, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 02:00:08PM +0800, Jason Wang wrote:


On 2017年03月30日 04:48, Michael S. Tsirkin wrote:

We are going to add more parameters to find_vqs, let's wrap the call so
we don't need to tweak all drivers every time.

Signed-off-by: Michael S. Tsirkin
---

A quick glance and it looks ok, but what the benefit of this series, is it
required by other changes?

Thanks

Yes - to avoid touching all devices when doing the rest of
the patchset.


Maybe I'm not clear. I mean the benefit of this series not this single 
patch. I guess it may be used by you proposal that avoid reset when set 
XDP? If yes, do we really want to drop some packets after XDP is set?


Thanks


Re: [printk] fbc14616f4: BUG:kernel_reboot-without-warning_in_test_stage

2017-03-30 Thread Sergey Senozhatsky
On (03/31/17 11:35), Sergey Senozhatsky wrote:
[..]
> > [   21.009531] VFS: Warning: trinity-c2 using old stat() call. Recompile 
> > your binary.
> > [   21.148898] VFS: Warning: trinity-c0 using old stat() call. Recompile 
> > your binary.
> > [   22.298208] warning: process `trinity-c2' used the deprecated sysctl 
> > system call with 
> > 
> > Elapsed time: 310
> > BUG: kernel reboot-without-warning in test stage
> 
> so as far as I understand, this is the "missing kernel messages"
> type of bug report. a worst case scenario.

panic() should have called console_flush_on_panic(), which sould have
flushed the messages regardless the printk_kthread state. so it probably
was not panic() that rebooted the kernel. (probably).

kernel_restart() and kernel_halt() have pr_emerg() messages, printk switches
to printk_emergency mode the first time it sees EMERG level message. (may be
we switch to late).

on the other hand, there is a emergency_restart(), where we don't switch
to printk_emergency mode and don't flush the existing kernel messages.
there is a bunch of places that call emergency_restart(), including sysrq.

may I ask you, how do you usually restart the vm after the test?
`echo X > /proc/sysrq-trigger'?

does this patch make it any better?

---
 drivers/tty/sysrq.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 817dfb69914d..069f5540be36 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -240,7 +240,6 @@ static DECLARE_WORK(sysrq_showallcpus, 
sysrq_showregs_othercpus);
 
 static void sysrq_handle_showallcpus(int key)
 {
-   printk_emergency_begin();
/*
 * Fall back to the workqueue based printing if the
 * backtrace printing did not succeed or the
@@ -255,7 +254,6 @@ static void sysrq_handle_showallcpus(int key)
}
schedule_work(_showallcpus);
}
-   printk_emergency_end();
 }
 
 static struct sysrq_key_op sysrq_showallcpus_op = {
@@ -282,10 +280,8 @@ static struct sysrq_key_op sysrq_showregs_op = {
 
 static void sysrq_handle_showstate(int key)
 {
-   printk_emergency_begin();
show_state();
show_workqueue_state();
-   printk_emergency_end();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
.handler= sysrq_handle_showstate,
@@ -296,9 +292,7 @@ static struct sysrq_key_op sysrq_showstate_op = {
 
 static void sysrq_handle_showstate_blocked(int key)
 {
-   printk_emergency_begin();
show_state_filter(TASK_UNINTERRUPTIBLE);
-   printk_emergency_end();
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
.handler= sysrq_handle_showstate_blocked,
@@ -537,6 +531,7 @@ void __handle_sysrq(int key, bool check_mask)
int orig_log_level;
int i;
 
+   printk_emergency_begin();
rcu_sysrq_start();
rcu_read_lock();
/*
@@ -582,6 +577,7 @@ void __handle_sysrq(int key, bool check_mask)
}
rcu_read_unlock();
rcu_sysrq_end();
+   printk_emergency_end();
 }
 
 void handle_sysrq(int key)
-- 
2.12.2



Re: [printk] fbc14616f4: BUG:kernel_reboot-without-warning_in_test_stage

2017-03-30 Thread Sergey Senozhatsky
On (03/31/17 11:35), Sergey Senozhatsky wrote:
[..]
> > [   21.009531] VFS: Warning: trinity-c2 using old stat() call. Recompile 
> > your binary.
> > [   21.148898] VFS: Warning: trinity-c0 using old stat() call. Recompile 
> > your binary.
> > [   22.298208] warning: process `trinity-c2' used the deprecated sysctl 
> > system call with 
> > 
> > Elapsed time: 310
> > BUG: kernel reboot-without-warning in test stage
> 
> so as far as I understand, this is the "missing kernel messages"
> type of bug report. a worst case scenario.

panic() should have called console_flush_on_panic(), which sould have
flushed the messages regardless the printk_kthread state. so it probably
was not panic() that rebooted the kernel. (probably).

kernel_restart() and kernel_halt() have pr_emerg() messages, printk switches
to printk_emergency mode the first time it sees EMERG level message. (may be
we switch to late).

on the other hand, there is a emergency_restart(), where we don't switch
to printk_emergency mode and don't flush the existing kernel messages.
there is a bunch of places that call emergency_restart(), including sysrq.

may I ask you, how do you usually restart the vm after the test?
`echo X > /proc/sysrq-trigger'?

does this patch make it any better?

---
 drivers/tty/sysrq.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 817dfb69914d..069f5540be36 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -240,7 +240,6 @@ static DECLARE_WORK(sysrq_showallcpus, 
sysrq_showregs_othercpus);
 
 static void sysrq_handle_showallcpus(int key)
 {
-   printk_emergency_begin();
/*
 * Fall back to the workqueue based printing if the
 * backtrace printing did not succeed or the
@@ -255,7 +254,6 @@ static void sysrq_handle_showallcpus(int key)
}
schedule_work(_showallcpus);
}
-   printk_emergency_end();
 }
 
 static struct sysrq_key_op sysrq_showallcpus_op = {
@@ -282,10 +280,8 @@ static struct sysrq_key_op sysrq_showregs_op = {
 
 static void sysrq_handle_showstate(int key)
 {
-   printk_emergency_begin();
show_state();
show_workqueue_state();
-   printk_emergency_end();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
.handler= sysrq_handle_showstate,
@@ -296,9 +292,7 @@ static struct sysrq_key_op sysrq_showstate_op = {
 
 static void sysrq_handle_showstate_blocked(int key)
 {
-   printk_emergency_begin();
show_state_filter(TASK_UNINTERRUPTIBLE);
-   printk_emergency_end();
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
.handler= sysrq_handle_showstate_blocked,
@@ -537,6 +531,7 @@ void __handle_sysrq(int key, bool check_mask)
int orig_log_level;
int i;
 
+   printk_emergency_begin();
rcu_sysrq_start();
rcu_read_lock();
/*
@@ -582,6 +577,7 @@ void __handle_sysrq(int key, bool check_mask)
}
rcu_read_unlock();
rcu_sysrq_end();
+   printk_emergency_end();
 }
 
 void handle_sysrq(int key)
-- 
2.12.2



Re: [PATCH V2 net-next 7/7] vhost_net: try batch dequing from skb array

2017-03-30 Thread Jason Wang



On 2017年03月30日 22:21, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:30PM +0800, Jason Wang wrote:

We used to dequeue one skb during recvmsg() from skb_array, this could
be inefficient because of the bad cache utilization

which cache does this refer to btw?


Both icache and dcache more or less.




and spinlock
touching for each packet.

Do you mean the effect of extra two atomics here?


In fact four, packet length peeking needs another two.




This patch tries to batch them by calling
batch dequeuing helpers explicitly on the exported skb array and pass
the skb back through msg_control for underlayer socket to finish the
userspace copying.

Tests were done by XDP1:
- small buffer:
   Before: 1.88Mpps
   After : 2.25Mpps (+19.6%)
- mergeable buffer:
   Before: 1.83Mpps
   After : 2.10Mpps (+14.7%)

Signed-off-by: Jason Wang 

Looks like I misread the code previously. More comments below,
sorry about not asking these questions earlier.


---
  drivers/vhost/net.c | 64 +
  1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9b51989..ffa78c6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -28,6 +28,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  
  #include 
  
@@ -85,6 +87,7 @@ struct vhost_net_ubuf_ref {

struct vhost_virtqueue *vq;
  };
  
+#define VHOST_RX_BATCH 64

  struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
size_t vhost_hlen;

Could you please try playing with batch size and see
what the effect is?


Ok. I tried 32 which seems slower than 64 but still faster than no batching.




@@ -99,6 +102,10 @@ struct vhost_net_virtqueue {
/* Reference counting for outstanding ubufs.
 * Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
+   struct skb_array *rx_array;
+   void *rxq[VHOST_RX_BATCH];
+   int rt;
+   int rh;
  };
  
  struct vhost_net {

@@ -201,6 +208,8 @@ static void vhost_net_vq_reset(struct vhost_net *n)
n->vqs[i].ubufs = NULL;
n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0;
+   n->vqs[i].rt = 0;
+   n->vqs[i].rh = 0;
}
  
  }

@@ -503,13 +512,30 @@ static void handle_tx(struct vhost_net *net)
mutex_unlock(>mutex);
  }
  
-static int peek_head_len(struct sock *sk)

+static int fetch_skbs(struct vhost_net_virtqueue *rvq)
+{
+   if (rvq->rh != rvq->rt)
+   goto out;
+
+   rvq->rh = rvq->rt = 0;
+   rvq->rt = skb_array_consume_batched(rvq->rx_array, rvq->rxq,
+   VHOST_RX_BATCH);
+   if (!rvq->rt)
+   return 0;
+out:
+   return __skb_array_len_with_tag(rvq->rxq[rvq->rh]);
+}
+
+static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
  {
struct socket *sock = sk->sk_socket;
struct sk_buff *head;
int len = 0;
unsigned long flags;
  
+	if (rvq->rx_array)

+   return fetch_skbs(rvq);
+
if (sock->ops->peek_len)
return sock->ops->peek_len(sock);
  
@@ -535,12 +561,14 @@ static int sk_has_rx_data(struct sock *sk)

return skb_queue_empty(>sk_receive_queue);
  }
  
-static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)

+static int vhost_net_rx_peek_head_len(struct vhost_net *net,
+ struct sock *sk)
  {
+   struct vhost_net_virtqueue *rvq = >vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = >vq;
unsigned long uninitialized_var(endtime);
-   int len = peek_head_len(sk);
+   int len = peek_head_len(rvq, sk);
  
  	if (!len && vq->busyloop_timeout) {

/* Both tx vq and rx socket were polled here */
@@ -561,7 +589,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net 
*net, struct sock *sk)
vhost_poll_queue(>poll);
mutex_unlock(>mutex);
  
-		len = peek_head_len(sk);

+   len = peek_head_len(rvq, sk);
}
  
  	return len;

@@ -699,6 +727,8 @@ static void handle_rx(struct vhost_net *net)
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
+   if (nvq->rx_array)
+   msg.msg_control = nvq->rxq[nvq->rh++];
/* On overrun, truncate and discard */
if (unlikely(headcount > UIO_MAXIOV)) {
iov_iter_init(_iter, READ, vq->iov, 1, 1);

So there's a bit of a mystery here. vhost code isn't
batched, all we are batching is the fetch from the tun ring.


I've already had vhost batching code on top (e.g descriptor indices 
prefetching and used ring batched 

Re: [PATCH V2 net-next 7/7] vhost_net: try batch dequing from skb array

2017-03-30 Thread Jason Wang



On 2017年03月30日 22:21, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:30PM +0800, Jason Wang wrote:

We used to dequeue one skb during recvmsg() from skb_array, this could
be inefficient because of the bad cache utilization

which cache does this refer to btw?


Both icache and dcache more or less.




and spinlock
touching for each packet.

Do you mean the effect of extra two atomics here?


In fact four, packet length peeking needs another two.




This patch tries to batch them by calling
batch dequeuing helpers explicitly on the exported skb array and pass
the skb back through msg_control for underlayer socket to finish the
userspace copying.

Tests were done by XDP1:
- small buffer:
   Before: 1.88Mpps
   After : 2.25Mpps (+19.6%)
- mergeable buffer:
   Before: 1.83Mpps
   After : 2.10Mpps (+14.7%)

Signed-off-by: Jason Wang 

Looks like I misread the code previously. More comments below,
sorry about not asking these questions earlier.


---
  drivers/vhost/net.c | 64 +
  1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9b51989..ffa78c6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -28,6 +28,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  
  #include 
  
@@ -85,6 +87,7 @@ struct vhost_net_ubuf_ref {

struct vhost_virtqueue *vq;
  };
  
+#define VHOST_RX_BATCH 64

  struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
size_t vhost_hlen;

Could you please try playing with batch size and see
what the effect is?


Ok. I tried 32 which seems slower than 64 but still faster than no batching.




@@ -99,6 +102,10 @@ struct vhost_net_virtqueue {
/* Reference counting for outstanding ubufs.
 * Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
+   struct skb_array *rx_array;
+   void *rxq[VHOST_RX_BATCH];
+   int rt;
+   int rh;
  };
  
  struct vhost_net {

@@ -201,6 +208,8 @@ static void vhost_net_vq_reset(struct vhost_net *n)
n->vqs[i].ubufs = NULL;
n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0;
+   n->vqs[i].rt = 0;
+   n->vqs[i].rh = 0;
}
  
  }

@@ -503,13 +512,30 @@ static void handle_tx(struct vhost_net *net)
mutex_unlock(>mutex);
  }
  
-static int peek_head_len(struct sock *sk)

+static int fetch_skbs(struct vhost_net_virtqueue *rvq)
+{
+   if (rvq->rh != rvq->rt)
+   goto out;
+
+   rvq->rh = rvq->rt = 0;
+   rvq->rt = skb_array_consume_batched(rvq->rx_array, rvq->rxq,
+   VHOST_RX_BATCH);
+   if (!rvq->rt)
+   return 0;
+out:
+   return __skb_array_len_with_tag(rvq->rxq[rvq->rh]);
+}
+
+static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
  {
struct socket *sock = sk->sk_socket;
struct sk_buff *head;
int len = 0;
unsigned long flags;
  
+	if (rvq->rx_array)

+   return fetch_skbs(rvq);
+
if (sock->ops->peek_len)
return sock->ops->peek_len(sock);
  
@@ -535,12 +561,14 @@ static int sk_has_rx_data(struct sock *sk)

return skb_queue_empty(>sk_receive_queue);
  }
  
-static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)

+static int vhost_net_rx_peek_head_len(struct vhost_net *net,
+ struct sock *sk)
  {
+   struct vhost_net_virtqueue *rvq = >vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = >vq;
unsigned long uninitialized_var(endtime);
-   int len = peek_head_len(sk);
+   int len = peek_head_len(rvq, sk);
  
  	if (!len && vq->busyloop_timeout) {

/* Both tx vq and rx socket were polled here */
@@ -561,7 +589,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net 
*net, struct sock *sk)
vhost_poll_queue(>poll);
mutex_unlock(>mutex);
  
-		len = peek_head_len(sk);

+   len = peek_head_len(rvq, sk);
}
  
  	return len;

@@ -699,6 +727,8 @@ static void handle_rx(struct vhost_net *net)
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
+   if (nvq->rx_array)
+   msg.msg_control = nvq->rxq[nvq->rh++];
/* On overrun, truncate and discard */
if (unlikely(headcount > UIO_MAXIOV)) {
iov_iter_init(_iter, READ, vq->iov, 1, 1);

So there's a bit of a mystery here. vhost code isn't
batched, all we are batching is the fetch from the tun ring.


I've already had vhost batching code on top (e.g descriptor indices 
prefetching and used ring batched updating like dpdk). 

Re: [PATCH v3 34/37] mtd: nand: allow drivers to request minimum alignment for passed buffer

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-30 17:15 GMT+09:00 Masahiro Yamada :
> In some cases, nand_do_{read,write}_ops is passed with unaligned
> ops->datbuf.  Drivers using DMA will be unhappy about unaligned
> buffer.
>
> The new struct member, buf_align, represents the minimum alignment
> the driver require for the buffer.  If the buffer passed from the
> upper MTD layer does not have enough alignment, nand_do_*_ops will
> use bufpoi.
>
> Signed-off-by: Masahiro Yamada 
> ---
>
> I was hit by this problem when I ran
>   # mount -t jffs2 /dev/mtdblock*  /mnt
>
> The buffer passed to nand_do_*_ops has 4 byte offset.
> The Denali IP cannot do DMA to/from this buffer because it
> requires 16 byte alignment for DMA.
>
>
> Changes in v3:
>   - Newly added
>
> Changes in v2: None
>
>  drivers/mtd/nand/nand_base.c | 12 
>  include/linux/mtd/nand.h |  2 ++
>  2 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
> index e9d3195..b528ffa 100644
> --- a/drivers/mtd/nand/nand_base.c
> +++ b/drivers/mtd/nand/nand_base.c
> @@ -1953,9 +1953,10 @@ static int nand_do_read_ops(struct mtd_info *mtd, 
> loff_t from,
>
> if (!aligned)
> use_bufpoi = 1;
> -   else if (chip->options & NAND_USE_BOUNCE_BUFFER)
> -   use_bufpoi = !virt_addr_valid(buf);
> -   else
> +   else if (chip->options & NAND_USE_BOUNCE_BUFFER) {
> +   use_bufpoi = !virt_addr_valid(buf) ||
> +  !IS_ALIGNED((unsigned long)buf, 
> chip->buf_align);
> +   } else
> use_bufpoi = 0;


I noticed I added unneeded braces here by mistake.
(When I was testing this part, I inserted printk here,
then I forgot to remove the {} . )

Can you fix-up it?

If requested, I can re-send it.





-- 
Best Regards
Masahiro Yamada


Re: [PATCH v3 34/37] mtd: nand: allow drivers to request minimum alignment for passed buffer

2017-03-30 Thread Masahiro Yamada
Hi Boris,


2017-03-30 17:15 GMT+09:00 Masahiro Yamada :
> In some cases, nand_do_{read,write}_ops is passed with unaligned
> ops->datbuf.  Drivers using DMA will be unhappy about unaligned
> buffer.
>
> The new struct member, buf_align, represents the minimum alignment
> the driver require for the buffer.  If the buffer passed from the
> upper MTD layer does not have enough alignment, nand_do_*_ops will
> use bufpoi.
>
> Signed-off-by: Masahiro Yamada 
> ---
>
> I was hit by this problem when I ran
>   # mount -t jffs2 /dev/mtdblock*  /mnt
>
> The buffer passed to nand_do_*_ops has 4 byte offset.
> The Denali IP cannot do DMA to/from this buffer because it
> requires 16 byte alignment for DMA.
>
>
> Changes in v3:
>   - Newly added
>
> Changes in v2: None
>
>  drivers/mtd/nand/nand_base.c | 12 
>  include/linux/mtd/nand.h |  2 ++
>  2 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
> index e9d3195..b528ffa 100644
> --- a/drivers/mtd/nand/nand_base.c
> +++ b/drivers/mtd/nand/nand_base.c
> @@ -1953,9 +1953,10 @@ static int nand_do_read_ops(struct mtd_info *mtd, 
> loff_t from,
>
> if (!aligned)
> use_bufpoi = 1;
> -   else if (chip->options & NAND_USE_BOUNCE_BUFFER)
> -   use_bufpoi = !virt_addr_valid(buf);
> -   else
> +   else if (chip->options & NAND_USE_BOUNCE_BUFFER) {
> +   use_bufpoi = !virt_addr_valid(buf) ||
> +  !IS_ALIGNED((unsigned long)buf, 
> chip->buf_align);
> +   } else
> use_bufpoi = 0;


I noticed I added unneeded braces here by mistake.
(When I was testing this part, I inserted printk here,
then I forgot to remove the {} . )

Can you fix-up it?

If requested, I can re-send it.





-- 
Best Regards
Masahiro Yamada


Re: [PATCH 2/7] iommu/iova: cut down judgement times

2017-03-30 Thread Leizhen (ThunderTown)


On 2017/3/23 20:11, Robin Murphy wrote:
> On 22/03/17 06:27, Zhen Lei wrote:
>> Below judgement can only be satisfied at the last time, which produced 2N
>> judgements(suppose N times failed, 0 or 1 time successed) in vain.
>>
>> if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
>>  return iova;
>> }
> 
> For me, GCC (6.2.1 AArch64) seems to do a pretty good job of this
> function already, so this change only saves two instructions in total
> (pfn is compared against pfn_lo only once instead of twice), which I
> wouldn't expect to see a noticeable performance effect from.
OK, thanks for your careful analysis.

Although only two instructions saved in each loop iteration, but it's also an 
improvment and no harm.

> 
> Given the improvement in readability, though, I don't even care about
> any codegen differences :)
> 
> Reviewed-by: Robin Murphy 
> 
>> Signed-off-by: Zhen Lei 
>> ---
>>  drivers/iommu/iova.c | 9 +++--
>>  1 file changed, 3 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>> index 8ba8b496..1c49969 100644
>> --- a/drivers/iommu/iova.c
>> +++ b/drivers/iommu/iova.c
>> @@ -312,15 +312,12 @@ private_find_iova(struct iova_domain *iovad, unsigned 
>> long pfn)
>>  while (node) {
>>  struct iova *iova = rb_entry(node, struct iova, node);
>>  
>> -/* If pfn falls within iova's range, return iova */
>> -if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
>> -return iova;
>> -}
>> -
>>  if (pfn < iova->pfn_lo)
>>  node = node->rb_left;
>> -else if (pfn > iova->pfn_lo)
>> +else if (pfn > iova->pfn_hi)
>>  node = node->rb_right;
>> +else
>> +return iova;/* pfn falls within iova's range */
>>  }
>>  
>>  return NULL;
>>
> 
> 
> .
> 

-- 
Thanks!
BestRegards



Re: [PATCH 2/7] iommu/iova: cut down judgement times

2017-03-30 Thread Leizhen (ThunderTown)


On 2017/3/23 20:11, Robin Murphy wrote:
> On 22/03/17 06:27, Zhen Lei wrote:
>> Below judgement can only be satisfied at the last time, which produced 2N
>> judgements(suppose N times failed, 0 or 1 time successed) in vain.
>>
>> if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
>>  return iova;
>> }
> 
> For me, GCC (6.2.1 AArch64) seems to do a pretty good job of this
> function already, so this change only saves two instructions in total
> (pfn is compared against pfn_lo only once instead of twice), which I
> wouldn't expect to see a noticeable performance effect from.
OK, thanks for your careful analysis.

Although only two instructions saved in each loop iteration, but it's also an 
improvment and no harm.

> 
> Given the improvement in readability, though, I don't even care about
> any codegen differences :)
> 
> Reviewed-by: Robin Murphy 
> 
>> Signed-off-by: Zhen Lei 
>> ---
>>  drivers/iommu/iova.c | 9 +++--
>>  1 file changed, 3 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>> index 8ba8b496..1c49969 100644
>> --- a/drivers/iommu/iova.c
>> +++ b/drivers/iommu/iova.c
>> @@ -312,15 +312,12 @@ private_find_iova(struct iova_domain *iovad, unsigned 
>> long pfn)
>>  while (node) {
>>  struct iova *iova = rb_entry(node, struct iova, node);
>>  
>> -/* If pfn falls within iova's range, return iova */
>> -if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
>> -return iova;
>> -}
>> -
>>  if (pfn < iova->pfn_lo)
>>  node = node->rb_left;
>> -else if (pfn > iova->pfn_lo)
>> +else if (pfn > iova->pfn_hi)
>>  node = node->rb_right;
>> +else
>> +return iova;/* pfn falls within iova's range */
>>  }
>>  
>>  return NULL;
>>
> 
> 
> .
> 

-- 
Thanks!
BestRegards



Re: [f2fs-dev] [PATCH 1/2] f2fs: write small sized IO to hot log

2017-03-30 Thread Jaegeuk Kim
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
> 
> On 2017/3/30 4:48, Jaegeuk Kim wrote:
> > It would better split small and large IOs separately in order to get more
> > consecutive big writes.
> > 
> > The default threshold is set to 64KB, but configurable by 
> > sysfs/min_hot_blocks.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/data.c|  9 +
> >  fs/f2fs/f2fs.h|  2 ++
> >  fs/f2fs/segment.c | 13 ++---
> >  fs/f2fs/segment.h |  1 +
> >  fs/f2fs/super.c   |  2 ++
> >  5 files changed, 20 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 090413236b27..8f36080b47c4 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool 
> > *submitted,
> > need_balance_fs = true;
> > else if (has_not_enough_free_secs(sbi, 0, 0))
> > goto redirty_out;
> > +   else
> > +   set_inode_flag(inode, FI_HOT_DATA);
> 
> Why here we need this, can you explain more about this?

I fixed this.
Please refer the up-to-date patch that I've been testing.

https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test=6976ab59090395014368296f154426c9311d69dc
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test=65f770f2ead7dfdf661b2da49af1aa814b662c93

Thanks,

> 
> Thanks.
> 
> > 
> > err = -EAGAIN;
> > if (f2fs_has_inline_data(inode)) {
> > @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool 
> > *submitted,
> > if (wbc->for_reclaim) {
> > f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> > DATA, WRITE);
> > +   clear_inode_flag(inode, FI_HOT_DATA);
> > remove_dirty_inode(inode);
> > submitted = NULL;
> > }
> > @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct 
> > address_space *mapping,
> > 
> > pagevec_init(, 0);
> > 
> > +   if (get_dirty_pages(mapping->host) <=
> > +   SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> > +   set_inode_flag(mapping->host, FI_HOT_DATA);
> > +   else
> > +   clear_inode_flag(mapping->host, FI_HOT_DATA);
> > +
> > if (wbc->range_cyclic) {
> > writeback_index = mapping->writeback_index; /* prev offset */
> > index = writeback_index;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5a49518ee786..32d6f674c114 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> > unsigned int ipu_policy;/* in-place-update policy */
> > unsigned int min_ipu_util;  /* in-place-update threshold */
> > unsigned int min_fsync_blocks;  /* threshold for fsync */
> > +   unsigned int min_hot_blocks;/* threshold for hot block allocation */
> > 
> > /* for flush command control */
> > struct flush_cmd_control *fcc_info;
> > @@ -1717,6 +1718,7 @@ enum {
> > FI_DO_DEFRAG,   /* indicate defragment is running */
> > FI_DIRTY_FILE,  /* indicate regular/symlink has dirty pages */
> > FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> > +   FI_HOT_DATA,/* indicate file is hot */
> >  };
> > 
> >  static inline void __mark_inode_dirty_flag(struct inode *inode,
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index b5b2a4745328..bff3f3bc7827 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, 
> > enum page_type p_type)
> > if (p_type == DATA) {
> > struct inode *inode = page->mapping->host;
> > 
> > -   if (S_ISDIR(inode->i_mode))
> > -   return CURSEG_HOT_DATA;
> > -   else if (is_cold_data(page) || file_is_cold(inode))
> > +   if (is_cold_data(page) || file_is_cold(inode))
> > return CURSEG_COLD_DATA;
> > -   else
> > -   return CURSEG_WARM_DATA;
> > +   if (is_inode_flag_set(inode, FI_HOT_DATA))
> > +   return CURSEG_HOT_DATA;
> > +   return CURSEG_WARM_DATA;
> > } else {
> > if (IS_DNODE(page))
> > return is_cold_node(page) ? CURSEG_WARM_NODE :
> > CURSEG_HOT_NODE;
> > -   else
> > -   return CURSEG_COLD_NODE;
> > +   return CURSEG_COLD_NODE;
> > }
> >  }
> > 
> > @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> > sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> > sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> > sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > +   sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> > 
> > sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> > 
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h

Re: [f2fs-dev] [PATCH 1/2] f2fs: write small sized IO to hot log

2017-03-30 Thread Jaegeuk Kim
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
> 
> On 2017/3/30 4:48, Jaegeuk Kim wrote:
> > It would better split small and large IOs separately in order to get more
> > consecutive big writes.
> > 
> > The default threshold is set to 64KB, but configurable by 
> > sysfs/min_hot_blocks.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/data.c|  9 +
> >  fs/f2fs/f2fs.h|  2 ++
> >  fs/f2fs/segment.c | 13 ++---
> >  fs/f2fs/segment.h |  1 +
> >  fs/f2fs/super.c   |  2 ++
> >  5 files changed, 20 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 090413236b27..8f36080b47c4 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool 
> > *submitted,
> > need_balance_fs = true;
> > else if (has_not_enough_free_secs(sbi, 0, 0))
> > goto redirty_out;
> > +   else
> > +   set_inode_flag(inode, FI_HOT_DATA);
> 
> Why here we need this, can you explain more about this?

I fixed this.
Please refer the up-to-date patch that I've been testing.

https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test=6976ab59090395014368296f154426c9311d69dc
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test=65f770f2ead7dfdf661b2da49af1aa814b662c93

Thanks,

> 
> Thanks.
> 
> > 
> > err = -EAGAIN;
> > if (f2fs_has_inline_data(inode)) {
> > @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool 
> > *submitted,
> > if (wbc->for_reclaim) {
> > f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> > DATA, WRITE);
> > +   clear_inode_flag(inode, FI_HOT_DATA);
> > remove_dirty_inode(inode);
> > submitted = NULL;
> > }
> > @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct 
> > address_space *mapping,
> > 
> > pagevec_init(, 0);
> > 
> > +   if (get_dirty_pages(mapping->host) <=
> > +   SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> > +   set_inode_flag(mapping->host, FI_HOT_DATA);
> > +   else
> > +   clear_inode_flag(mapping->host, FI_HOT_DATA);
> > +
> > if (wbc->range_cyclic) {
> > writeback_index = mapping->writeback_index; /* prev offset */
> > index = writeback_index;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5a49518ee786..32d6f674c114 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> > unsigned int ipu_policy;/* in-place-update policy */
> > unsigned int min_ipu_util;  /* in-place-update threshold */
> > unsigned int min_fsync_blocks;  /* threshold for fsync */
> > +   unsigned int min_hot_blocks;/* threshold for hot block allocation */
> > 
> > /* for flush command control */
> > struct flush_cmd_control *fcc_info;
> > @@ -1717,6 +1718,7 @@ enum {
> > FI_DO_DEFRAG,   /* indicate defragment is running */
> > FI_DIRTY_FILE,  /* indicate regular/symlink has dirty pages */
> > FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> > +   FI_HOT_DATA,/* indicate file is hot */
> >  };
> > 
> >  static inline void __mark_inode_dirty_flag(struct inode *inode,
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index b5b2a4745328..bff3f3bc7827 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, 
> > enum page_type p_type)
> > if (p_type == DATA) {
> > struct inode *inode = page->mapping->host;
> > 
> > -   if (S_ISDIR(inode->i_mode))
> > -   return CURSEG_HOT_DATA;
> > -   else if (is_cold_data(page) || file_is_cold(inode))
> > +   if (is_cold_data(page) || file_is_cold(inode))
> > return CURSEG_COLD_DATA;
> > -   else
> > -   return CURSEG_WARM_DATA;
> > +   if (is_inode_flag_set(inode, FI_HOT_DATA))
> > +   return CURSEG_HOT_DATA;
> > +   return CURSEG_WARM_DATA;
> > } else {
> > if (IS_DNODE(page))
> > return is_cold_node(page) ? CURSEG_WARM_NODE :
> > CURSEG_HOT_NODE;
> > -   else
> > -   return CURSEG_COLD_NODE;
> > +   return CURSEG_COLD_NODE;
> > }
> >  }
> > 
> > @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> > sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> > sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> > sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > +   sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> > 
> > sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> > 
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index 

Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread heyunlei

Hi Jaegeuk,

Can we split in place update bios into single sbi->f2fs_bio_info for more page
merged in out place update? This case can be show as below:

in place update submit a bio with one page
out place update submit a bio with one page
in place update submit a bio with one page
out place update submit a bio with one page
... ...

just like WB_SYNC_ALL and WB_SYNC_NONE case.

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:

If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.

So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c  | 15 +--
 fs/f2fs/f2fs.h  |  3 +++
 fs/f2fs/super.c |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
last_idx = page->index;
}

-   if (--wbc->nr_to_write <= 0 &&
-   wbc->sync_mode == WB_SYNC_NONE) {
+   /* give a priority to WB_SYNC threads */
+   if ((atomic_read(_M_SB(mapping)->wb_sync_req) ||
+   --wbc->nr_to_write <= 0) &&
+   wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,

trace_f2fs_writepages(mapping->host, wbc, DATA);

+   /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_inc(>wb_sync_req);
+   else if (atomic_read(>wb_sync_req))
+   goto skip_write;
+
blk_start_plug();
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug();
+
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_dec(>wb_sync_req);
/*
 * if some pages were truncated, we cannot guarantee its mapping->host
 * to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;

+   /* writeback control */
+   atomic_t wb_sync_req;   /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(>nr_pages[i], 0);

+   atomic_set(>wb_sync_req, 0);
+
INIT_LIST_HEAD(>s_list);
mutex_init(>umount_mutex);
mutex_init(>wio_mutex[NODE]);





Re: [PATCH V2 net-next 1/7] ptr_ring: introduce batch dequeuing

2017-03-30 Thread Jason Wang



On 2017年03月30日 21:53, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:24PM +0800, Jason Wang wrote:

This patch introduce a batched version of consuming, consumer can
dequeue more than one pointers from the ring at a time. We don't care
about the reorder of reading here so no need for compiler barrier.

Signed-off-by: Jason Wang 
---
  include/linux/ptr_ring.h | 65 
  1 file changed, 65 insertions(+)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6c70444..2be0f350 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -247,6 +247,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
return ptr;
  }
  
+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,

+void **array, int n)

Can we use a shorter name? ptr_ring_consume_batch?


Ok, but at least we need to keep the prefix since there's a locked version.






+{
+   void *ptr;
+   int i;
+
+   for (i = 0; i < n; i++) {
+   ptr = __ptr_ring_consume(r);
+   if (!ptr)
+   break;
+   array[i] = ptr;
+   }
+
+   return i;
+}
+
  /*
   * Note: resize (below) nests producer lock within consumer lock, so if you
   * call this in interrupt or BH context, you must disable interrupts/BH when

I'd like to add a code comment here explaining why we don't
care about cpu or compiler reordering. And I think the reason is
in the way you use this API: in vhost it does not matter
if you get less entries than present in the ring.
That's ok but needs to be noted
in a code comment so people use this function correctly.


Interesting, but I still think it's not necessary.

If consumer is doing a busy polling, it will eventually get the entries. 
If the consumer need notification from producer, it should drain the 
queue which means it need enable notification before last try of 
consuming call, otherwise it was a bug. The batch consuming function in 
this patch can guarantee return at least one pointer if there's many, 
this looks sufficient for the correctness?


Thanks



Also, I think you need to repeat the comment about cpu_relax
near this function: if someone uses it in a loop,
a compiler barrier is needed to prevent compiler from
optimizing it out.

I note that ptr_ring_consume currently lacks any of these
comments so I'm ok with merging as is, and I'll add
documentation on top.
Like this perhaps?

/* Consume up to n entries and return the number of entries consumed
  * or 0 on ring empty.
  * Note: this might return early with less entries than present in the
  * ring.
  * Note: callers invoking this in a loop must use a compiler barrier,
  * for example cpu_relax(). Callers must take consumer_lock
  * if the ring is ever resized - see e.g. ptr_ring_consume_batch.
  */




@@ -297,6 +313,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
return ptr;
  }
  
+static inline int ptr_ring_consume_batched(struct ptr_ring *r,

+  void **array, int n)
+{
+   int ret;
+
+   spin_lock(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock(>consumer_lock);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+  void **array, int n)
+{
+   int ret;
+
+   spin_lock_irq(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_irq(>consumer_lock);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+  void **array, int n)
+{
+   unsigned long flags;
+   int ret;
+
+   spin_lock_irqsave(>consumer_lock, flags);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_irqrestore(>consumer_lock, flags);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+ void **array, int n)
+{
+   int ret;
+
+   spin_lock_bh(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_bh(>consumer_lock);
+
+   return ret;
+}
+
  /* Cast to structure type and call a function without discarding from FIFO.
   * Function must return a value.
   * Callers must take consumer_lock.
--
2.7.4




Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread heyunlei

Hi Jaegeuk,

Can we split in place update bios into single sbi->f2fs_bio_info for more page
merged in out place update? This case can be show as below:

in place update submit a bio with one page
out place update submit a bio with one page
in place update submit a bio with one page
out place update submit a bio with one page
... ...

just like WB_SYNC_ALL and WB_SYNC_NONE case.

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:

If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.

So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c  | 15 +--
 fs/f2fs/f2fs.h  |  3 +++
 fs/f2fs/super.c |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
last_idx = page->index;
}

-   if (--wbc->nr_to_write <= 0 &&
-   wbc->sync_mode == WB_SYNC_NONE) {
+   /* give a priority to WB_SYNC threads */
+   if ((atomic_read(_M_SB(mapping)->wb_sync_req) ||
+   --wbc->nr_to_write <= 0) &&
+   wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,

trace_f2fs_writepages(mapping->host, wbc, DATA);

+   /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_inc(>wb_sync_req);
+   else if (atomic_read(>wb_sync_req))
+   goto skip_write;
+
blk_start_plug();
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug();
+
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_dec(>wb_sync_req);
/*
 * if some pages were truncated, we cannot guarantee its mapping->host
 * to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;

+   /* writeback control */
+   atomic_t wb_sync_req;   /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(>nr_pages[i], 0);

+   atomic_set(>wb_sync_req, 0);
+
INIT_LIST_HEAD(>s_list);
mutex_init(>umount_mutex);
mutex_init(>wio_mutex[NODE]);





Re: [PATCH V2 net-next 1/7] ptr_ring: introduce batch dequeuing

2017-03-30 Thread Jason Wang



On 2017年03月30日 21:53, Michael S. Tsirkin wrote:

On Thu, Mar 30, 2017 at 03:22:24PM +0800, Jason Wang wrote:

This patch introduce a batched version of consuming, consumer can
dequeue more than one pointers from the ring at a time. We don't care
about the reorder of reading here so no need for compiler barrier.

Signed-off-by: Jason Wang 
---
  include/linux/ptr_ring.h | 65 
  1 file changed, 65 insertions(+)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6c70444..2be0f350 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -247,6 +247,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
return ptr;
  }
  
+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,

+void **array, int n)

Can we use a shorter name? ptr_ring_consume_batch?


Ok, but at least we need to keep the prefix since there's a locked version.






+{
+   void *ptr;
+   int i;
+
+   for (i = 0; i < n; i++) {
+   ptr = __ptr_ring_consume(r);
+   if (!ptr)
+   break;
+   array[i] = ptr;
+   }
+
+   return i;
+}
+
  /*
   * Note: resize (below) nests producer lock within consumer lock, so if you
   * call this in interrupt or BH context, you must disable interrupts/BH when

I'd like to add a code comment here explaining why we don't
care about cpu or compiler reordering. And I think the reason is
in the way you use this API: in vhost it does not matter
if you get less entries than present in the ring.
That's ok but needs to be noted
in a code comment so people use this function correctly.


Interesting, but I still think it's not necessary.

If consumer is doing a busy polling, it will eventually get the entries. 
If the consumer need notification from producer, it should drain the 
queue which means it need enable notification before last try of 
consuming call, otherwise it was a bug. The batch consuming function in 
this patch can guarantee return at least one pointer if there's many, 
this looks sufficient for the correctness?


Thanks



Also, I think you need to repeat the comment about cpu_relax
near this function: if someone uses it in a loop,
a compiler barrier is needed to prevent compiler from
optimizing it out.

I note that ptr_ring_consume currently lacks any of these
comments so I'm ok with merging as is, and I'll add
documentation on top.
Like this perhaps?

/* Consume up to n entries and return the number of entries consumed
  * or 0 on ring empty.
  * Note: this might return early with less entries than present in the
  * ring.
  * Note: callers invoking this in a loop must use a compiler barrier,
  * for example cpu_relax(). Callers must take consumer_lock
  * if the ring is ever resized - see e.g. ptr_ring_consume_batch.
  */




@@ -297,6 +313,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
return ptr;
  }
  
+static inline int ptr_ring_consume_batched(struct ptr_ring *r,

+  void **array, int n)
+{
+   int ret;
+
+   spin_lock(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock(>consumer_lock);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+  void **array, int n)
+{
+   int ret;
+
+   spin_lock_irq(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_irq(>consumer_lock);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+  void **array, int n)
+{
+   unsigned long flags;
+   int ret;
+
+   spin_lock_irqsave(>consumer_lock, flags);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_irqrestore(>consumer_lock, flags);
+
+   return ret;
+}
+
+static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+ void **array, int n)
+{
+   int ret;
+
+   spin_lock_bh(>consumer_lock);
+   ret = __ptr_ring_consume_batched(r, array, n);
+   spin_unlock_bh(>consumer_lock);
+
+   return ret;
+}
+
  /* Cast to structure type and call a function without discarding from FIFO.
   * Function must return a value.
   * Callers must take consumer_lock.
--
2.7.4




Re: [PATCH 3/6] mm: remove return value from init_currently_empty_zone

2017-03-30 Thread Hillf Danton
On March 30, 2017 7:55 PM Michal Hocko wrote: 
> 
> From: Michal Hocko 
> 
> init_currently_empty_zone doesn't have any error to return yet it is
> still an int and callers try to be defensive and try to handle potential
> error. Remove this nonsense and simplify all callers.
> 
It is already cut off in 1/6 in this series?



> -/* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
> - * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
> -static int __ref ensure_zone_is_initialized(struct zone *zone,
> +static void __ref ensure_zone_is_initialized(struct zone *zone,
>   unsigned long start_pfn, unsigned long num_pages)
>  {
> - if (zone_is_empty(zone))
> - return init_currently_empty_zone(zone, start_pfn, num_pages);
> -
> - return 0;
> + if (!zone_is_empty(zone))
> + init_currently_empty_zone(zone, start_pfn, num_pages);
>  }
Semantic change added?

Hillf



Re: [PATCH 3/6] mm: remove return value from init_currently_empty_zone

2017-03-30 Thread Hillf Danton
On March 30, 2017 7:55 PM Michal Hocko wrote: 
> 
> From: Michal Hocko 
> 
> init_currently_empty_zone doesn't have any error to return yet it is
> still an int and callers try to be defensive and try to handle potential
> error. Remove this nonsense and simplify all callers.
> 
It is already cut off in 1/6 in this series?



> -/* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
> - * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
> -static int __ref ensure_zone_is_initialized(struct zone *zone,
> +static void __ref ensure_zone_is_initialized(struct zone *zone,
>   unsigned long start_pfn, unsigned long num_pages)
>  {
> - if (zone_is_empty(zone))
> - return init_currently_empty_zone(zone, start_pfn, num_pages);
> -
> - return 0;
> + if (!zone_is_empty(zone))
> + init_currently_empty_zone(zone, start_pfn, num_pages);
>  }
Semantic change added?

Hillf



Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()

2017-03-30 Thread Yuyang Du
On Fri, Mar 31, 2017 at 03:13:55AM +0800, Yuyang Du wrote:
> On Thu, Mar 30, 2017 at 04:14:28PM +0200, Peter Zijlstra wrote:
> > On Thu, Mar 30, 2017 at 02:16:58PM +0200, Peter Zijlstra wrote:
> > > On Thu, Mar 30, 2017 at 04:21:08AM -0700, Paul Turner wrote:
> > 
> > > > > +
> > > > > +   if (unlikely(periods >= LOAD_AVG_MAX_N))
> > > > > return LOAD_AVG_MAX;
> > 
> > > > 
> > > > Is this correct in the iterated periods > LOAD_AVG_MAX_N case?
> > > > I don't think the decay above is guaranteed to return these to zero.
> > > 
> > > Ah!
> > > 
> > > Indeed, so decay_load() needs LOAD_AVG_PERIOD * 63 before it truncates
> > > to 0, because every LOAD_AVG_PERIOD we half the value; loose 1 bit; so
> > > 63 of those and we're 0.
> > > 
> > > But __accumulate_sum() OTOH returns LOAD_AVG_MAX after only
> > > LOAD_AVG_MAX_N, which < LOAD_AVG_PERIOD * 63.
> > > 
> > > So yes, combined we exceed LOAD_AVG_MAX, which is bad. Let me think what
> > > to do about that.
> > 
> > 
> > So at the very least it should be decay_load(LOAD_AVG_MAX, 1) (aka
> > LOAD_AVG_MAX - 1024), but that still doesn't account for the !0
> > decay_load() of the first segment.
> > 
> > I'm thinking that we can compute the middle segment, by taking the max
> > value and chopping off the ends, like:
> > 
> > 
> >  p
> >  c2 = 1024 \Sum y^n
> > n=1
> > 
> >   infinf
> > = 1024 ( \Sum y^n - \Sum y^n - y^0 )
> >   n=0n=p
>  
> It looks surprisingly kinda works :)
>  
> > +   c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
> ~
> But, I'm not sure   this is what you want (just assume p==0).
> 

Oh, what I meant is when p != 0, actually p>=1.

And thinking about it for a while, it's really what you want, brilliant :)


Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()

2017-03-30 Thread Yuyang Du
On Fri, Mar 31, 2017 at 03:13:55AM +0800, Yuyang Du wrote:
> On Thu, Mar 30, 2017 at 04:14:28PM +0200, Peter Zijlstra wrote:
> > On Thu, Mar 30, 2017 at 02:16:58PM +0200, Peter Zijlstra wrote:
> > > On Thu, Mar 30, 2017 at 04:21:08AM -0700, Paul Turner wrote:
> > 
> > > > > +
> > > > > +   if (unlikely(periods >= LOAD_AVG_MAX_N))
> > > > > return LOAD_AVG_MAX;
> > 
> > > > 
> > > > Is this correct in the iterated periods > LOAD_AVG_MAX_N case?
> > > > I don't think the decay above is guaranteed to return these to zero.
> > > 
> > > Ah!
> > > 
> > > Indeed, so decay_load() needs LOAD_AVG_PERIOD * 63 before it truncates
> > > to 0, because every LOAD_AVG_PERIOD we half the value; loose 1 bit; so
> > > 63 of those and we're 0.
> > > 
> > > But __accumulate_sum() OTOH returns LOAD_AVG_MAX after only
> > > LOAD_AVG_MAX_N, which < LOAD_AVG_PERIOD * 63.
> > > 
> > > So yes, combined we exceed LOAD_AVG_MAX, which is bad. Let me think what
> > > to do about that.
> > 
> > 
> > So at the very least it should be decay_load(LOAD_AVG_MAX, 1) (aka
> > LOAD_AVG_MAX - 1024), but that still doesn't account for the !0
> > decay_load() of the first segment.
> > 
> > I'm thinking that we can compute the middle segment, by taking the max
> > value and chopping off the ends, like:
> > 
> > 
> >  p
> >  c2 = 1024 \Sum y^n
> > n=1
> > 
> >   infinf
> > = 1024 ( \Sum y^n - \Sum y^n - y^0 )
> >   n=0n=p
>  
> It looks surprisingly kinda works :)
>  
> > +   c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
> ~
> But, I'm not sure   this is what you want (just assume p==0).
> 

Oh, what I meant is when p != 0, actually p>=1.

And thinking about it for a while, it's really what you want, brilliant :)


Re: [PATCH 4.10 00/17] 4.10.8-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 03:00 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.10.8 release.
There are 17 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 09:59:07 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 122 pass: 122 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



Re: [PATCH 4.10 00/17] 4.10.8-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 03:00 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.10.8 release.
There are 17 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 09:59:07 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 122 pass: 122 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



Re: [PATCH 4.9 00/16] 4.9.20-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 03:15 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.20 release.
There are 16 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 10:04:23 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 122 pass: 122 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



Re: [PATCH 4.9 00/16] 4.9.20-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 03:15 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.20 release.
There are 16 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 10:04:23 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 122 pass: 122 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



Re: [PATCH 4.4 00/14] 4.4.59-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 02:58 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.4.59 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 09:49:26 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 115 pass: 115 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



Re: [PATCH 4.4 00/14] 4.4.59-stable review

2017-03-30 Thread Guenter Roeck

On 03/30/2017 02:58 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.4.59 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sat Apr  1 09:49:26 UTC 2017.
Anything received after that time might be too late.



Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 115 pass: 115 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter



[PATCH] treewide: Correct diffrent[iate] and banlance typos

2017-03-30 Thread Joe Perches
Add these misspellings to scripts/spelling.txt too

Signed-off-by: Joe Perches 
---
 drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  | 2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c   | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_int.c   | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c  | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +-
 include/linux/mlx4/device.h | 2 +-
 scripts/spelling.txt| 3 +++
 8 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h 
b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
index 354ec07eae87..23ae72468025 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
@@ -70,7 +70,7 @@
 * (3) both long and short but short preferred and long only when necesarry
 *
 * These modes must be selected compile time via compile switches.
-* Compile switch settings for the diffrent modes:
+* Compile switch settings for the different modes:
 * (1) DRXDAPFASI_LONG_ADDR_ALLOWED=0, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
 * (2) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=0
 * (3) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index cea6bdcde33f..8baf9d3eb4b1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -1591,7 +1591,7 @@ static int __bnx2x_vlan_mac_execute_step(struct bnx2x *bp,
if (rc != 0) {
__bnx2x_vlan_mac_h_pend(bp, o, *ramrod_flags);
 
-   /* Calling function should not diffrentiate between this case
+   /* Calling function should not differentiate between this case
 * and the case in which there is already a pending ramrod
 */
rc = 1;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index fca37e2c7f01..e70324f4fe84 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1207,7 +1207,7 @@ static void hns_set_irq_affinity(struct hns_nic_priv 
*priv)
if (!alloc_cpumask_var(, GFP_KERNEL))
return;
 
-   /*diffrent irq banlance for 16core and 32core*/
+   /* different irq balance for 16core and 32core */
if (h->q_num == num_possible_cpus()) {
for (i = 0; i < h->q_num * 2; i++) {
rd = >ring_data[i];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c 
b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 84310b60849b..c6b348f00e7b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -3057,7 +3057,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct 
qed_ptt *p_ptt)
 
/* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
 * the number of VF SBs [especially for first VF on engine, as we can't
-* diffrentiate between empty entries and its entries].
+* differentiate between empty entries and its entries].
 * Since we don't really support more SBs than VFs today, prevent any
 * such configuration by sanitizing the number of SBs to equal the
 * number of VFs.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c 
b/drivers/net/ethernet/qlogic/qed/qed_main.c
index d4edb993b1b0..b595f7dd4a58 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -951,7 +951,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
if (rc)
goto err2;
 
-   /* First Dword used to diffrentiate between various sources */
+   /* First Dword used to differentiate between various sources */
data = cdev->firmware->data + sizeof(u32);
 
qed_dbg_pf_init(cdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c 
b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 18fc6e62ca41..a69774b19712 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -625,7 +625,7 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
 *  - If !ARI, VFs would start on next device.
 *so offset - (256 - pf_id) would provide the number.
 * Utilize the fact that (256 - pf_id) is achieved only by later
-* to diffrentiate between the two.
+* to differentiate between the two.
 */
 
if (p_hwfn->cdev->p_iov_info->offset < (256 - p_hwfn->abs_pf_id)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1beb1ec2fbdf..eb1a51a6617b 100644
--- 

[PATCH] treewide: Correct diffrent[iate] and banlance typos

2017-03-30 Thread Joe Perches
Add these misspellings to scripts/spelling.txt too

Signed-off-by: Joe Perches 
---
 drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  | 2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c   | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_int.c   | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c  | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +-
 include/linux/mlx4/device.h | 2 +-
 scripts/spelling.txt| 3 +++
 8 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h 
b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
index 354ec07eae87..23ae72468025 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
@@ -70,7 +70,7 @@
 * (3) both long and short but short preferred and long only when necesarry
 *
 * These modes must be selected compile time via compile switches.
-* Compile switch settings for the diffrent modes:
+* Compile switch settings for the different modes:
 * (1) DRXDAPFASI_LONG_ADDR_ALLOWED=0, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
 * (2) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=0
 * (3) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index cea6bdcde33f..8baf9d3eb4b1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -1591,7 +1591,7 @@ static int __bnx2x_vlan_mac_execute_step(struct bnx2x *bp,
if (rc != 0) {
__bnx2x_vlan_mac_h_pend(bp, o, *ramrod_flags);
 
-   /* Calling function should not diffrentiate between this case
+   /* Calling function should not differentiate between this case
 * and the case in which there is already a pending ramrod
 */
rc = 1;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index fca37e2c7f01..e70324f4fe84 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1207,7 +1207,7 @@ static void hns_set_irq_affinity(struct hns_nic_priv 
*priv)
if (!alloc_cpumask_var(, GFP_KERNEL))
return;
 
-   /*diffrent irq banlance for 16core and 32core*/
+   /* different irq balance for 16core and 32core */
if (h->q_num == num_possible_cpus()) {
for (i = 0; i < h->q_num * 2; i++) {
rd = >ring_data[i];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c 
b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 84310b60849b..c6b348f00e7b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -3057,7 +3057,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct 
qed_ptt *p_ptt)
 
/* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
 * the number of VF SBs [especially for first VF on engine, as we can't
-* diffrentiate between empty entries and its entries].
+* differentiate between empty entries and its entries].
 * Since we don't really support more SBs than VFs today, prevent any
 * such configuration by sanitizing the number of SBs to equal the
 * number of VFs.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c 
b/drivers/net/ethernet/qlogic/qed/qed_main.c
index d4edb993b1b0..b595f7dd4a58 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -951,7 +951,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
if (rc)
goto err2;
 
-   /* First Dword used to diffrentiate between various sources */
+   /* First Dword used to differentiate between various sources */
data = cdev->firmware->data + sizeof(u32);
 
qed_dbg_pf_init(cdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c 
b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 18fc6e62ca41..a69774b19712 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -625,7 +625,7 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
 *  - If !ARI, VFs would start on next device.
 *so offset - (256 - pf_id) would provide the number.
 * Utilize the fact that (256 - pf_id) is achieved only by later
-* to diffrentiate between the two.
+* to differentiate between the two.
 */
 
if (p_hwfn->cdev->p_iov_info->offset < (256 - p_hwfn->abs_pf_id)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1beb1ec2fbdf..eb1a51a6617b 100644
--- a/include/linux/mlx4/device.h

Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread heyunlei

Hi Jaegeuk,

I try this patch and find it can fix below case:

   kworker/u16:3-423   [002]    183.812347: submit_bio: kworker/u16:3(423): 
WRITE block 104749352 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812380: submit_bio: fio(2122): WRITE 
block 104749360 on mmcblk0p50 (24 sectors)
   kworker/u16:3-423   [002]    183.812388: submit_bio: kworker/u16:3(423): 
WRITE block 104749384 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812403: submit_bio: fio(2122): WRITE 
block 104749392 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812404: submit_bio: kworker/u16:3(423): 
WRITE block 104749400 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812427: submit_bio: fio(2122): WRITE 
block 104749408 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812429: submit_bio: kworker/u16:3(423): 
WRITE block 104749424 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812450: submit_bio: fio(2122): WRITE 
block 104749432 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812455: submit_bio: kworker/u16:3(423): 
WRITE block 104749448 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812470: submit_bio: fio(2122): WRITE 
block 104749456 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812476: submit_bio: kworker/u16:3(423): 
WRITE block 104749464 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812492: submit_bio: fio(2122): WRITE 
block 104749472 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812497: submit_bio: kworker/u16:3(423): 
WRITE block 104749488 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812512: submit_bio: fio(2122): WRITE 
block 104749496 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812514: submit_bio: kworker/u16:3(423): 
WRITE block 104749504 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812532: submit_bio: fio(2122): WRITE 
block 104749512 on mmcblk0p50 (16 sectors)

   ... ...

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:

If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.

So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c  | 15 +--
 fs/f2fs/f2fs.h  |  3 +++
 fs/f2fs/super.c |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
last_idx = page->index;
}

-   if (--wbc->nr_to_write <= 0 &&
-   wbc->sync_mode == WB_SYNC_NONE) {
+   /* give a priority to WB_SYNC threads */
+   if ((atomic_read(_M_SB(mapping)->wb_sync_req) ||
+   --wbc->nr_to_write <= 0) &&
+   wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,

trace_f2fs_writepages(mapping->host, wbc, DATA);

+   /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_inc(>wb_sync_req);
+   else if (atomic_read(>wb_sync_req))
+   goto skip_write;
+
blk_start_plug();
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug();
+
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_dec(>wb_sync_req);
/*
 * if some pages were truncated, we cannot guarantee its mapping->host
 * to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;

+   /* writeback control */
+   atomic_t wb_sync_req;   /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(>nr_pages[i], 0);

+   atomic_set(>wb_sync_req, 0);
+
INIT_LIST_HEAD(>s_list);
mutex_init(>umount_mutex);

Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

2017-03-30 Thread heyunlei

Hi Jaegeuk,

I try this patch and find it can fix below case:

   kworker/u16:3-423   [002]    183.812347: submit_bio: kworker/u16:3(423): 
WRITE block 104749352 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812380: submit_bio: fio(2122): WRITE 
block 104749360 on mmcblk0p50 (24 sectors)
   kworker/u16:3-423   [002]    183.812388: submit_bio: kworker/u16:3(423): 
WRITE block 104749384 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812403: submit_bio: fio(2122): WRITE 
block 104749392 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812404: submit_bio: kworker/u16:3(423): 
WRITE block 104749400 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812427: submit_bio: fio(2122): WRITE 
block 104749408 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812429: submit_bio: kworker/u16:3(423): 
WRITE block 104749424 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812450: submit_bio: fio(2122): WRITE 
block 104749432 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812455: submit_bio: kworker/u16:3(423): 
WRITE block 104749448 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812470: submit_bio: fio(2122): WRITE 
block 104749456 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812476: submit_bio: kworker/u16:3(423): 
WRITE block 104749464 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812492: submit_bio: fio(2122): WRITE 
block 104749472 on mmcblk0p50 (16 sectors)
   kworker/u16:3-423   [002]    183.812497: submit_bio: kworker/u16:3(423): 
WRITE block 104749488 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812512: submit_bio: fio(2122): WRITE 
block 104749496 on mmcblk0p50 (8 sectors)
   kworker/u16:3-423   [002]    183.812514: submit_bio: kworker/u16:3(423): 
WRITE block 104749504 on mmcblk0p50 (8 sectors)
 fio-2122  [003]    183.812532: submit_bio: fio(2122): WRITE 
block 104749512 on mmcblk0p50 (16 sectors)

   ... ...

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:

If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.

So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c  | 15 +--
 fs/f2fs/f2fs.h  |  3 +++
 fs/f2fs/super.c |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
last_idx = page->index;
}

-   if (--wbc->nr_to_write <= 0 &&
-   wbc->sync_mode == WB_SYNC_NONE) {
+   /* give a priority to WB_SYNC threads */
+   if ((atomic_read(_M_SB(mapping)->wb_sync_req) ||
+   --wbc->nr_to_write <= 0) &&
+   wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,

trace_f2fs_writepages(mapping->host, wbc, DATA);

+   /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_inc(>wb_sync_req);
+   else if (atomic_read(>wb_sync_req))
+   goto skip_write;
+
blk_start_plug();
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug();
+
+   if (wbc->sync_mode == WB_SYNC_ALL)
+   atomic_dec(>wb_sync_req);
/*
 * if some pages were truncated, we cannot guarantee its mapping->host
 * to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;

+   /* writeback control */
+   atomic_t wb_sync_req;   /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(>nr_pages[i], 0);

+   atomic_set(>wb_sync_req, 0);
+
INIT_LIST_HEAD(>s_list);
mutex_init(>umount_mutex);

  1   2   3   4   5   6   7   8   9   10   >