[PATCH 3.2 65/94] IB/mlx4: Fix the SQ size of an RC QP

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Yishai Hadas 

commit f2940e2c76bb554a7fbdd28ca5b90904117a9e96 upstream.

When calculating the required size of an RC QP send queue, leave
enough space for masked atomic operations, which require more space than
"regular" atomic operation.

Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations")
Signed-off-by: Yishai Hadas 
Reviewed-by: Jack Morgenstein 
Reviewed-by: Eran Ben Elisha 
Signed-off-by: Leon Romanovsky 
Signed-off-by: Doug Ledford 
Signed-off-by: Ben Hutchings 
---
 drivers/infiniband/hw/mlx4/qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -283,7 +283,7 @@ static int send_wqe_overhead(enum ib_qp_
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
-   sizeof (struct mlx4_wqe_atomic_seg) +
+   sizeof (struct mlx4_wqe_masked_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_SMI:
case IB_QPT_GSI:



[PATCH 3.16 173/305] USB: quirks: Fix entries on wrong list in 3.16.y

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Ben Hutchings 

Commits ddbe1fca0bcb ("USB: Add device quirk for ASUS T100 Base
Station keyboard") and e5dff0e80463 ("USB: Add OTG PET device to TPL")
were wrongly backported to 3.16.y.  The original commits added to
usb_quirk_list but the backported versions added to
usb_interface_quirk_list.

Signed-off-by: Ben Hutchings 
---
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -188,6 +188,14 @@ static const struct usb_device_id usb_qu
/* USB3503 */
{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* ASUS Base Station(T100) */
+   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
+   /* Protocol and OTG Electrical Test Device */
+   { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
+   USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+
/* Blackmagic Design Intensity Shuttle */
{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
 
@@ -202,14 +210,6 @@ static const struct usb_device_id usb_in
{ USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0),
  .driver_info = USB_QUIRK_RESET_RESUME },
 
-   /* ASUS Base Station(T100) */
-   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
-   /* Protocol and OTG Electrical Test Device */
-   { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
-   USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
-
{ }  /* terminating entry must be last */
 };
 



[PATCH 3.2 65/94] IB/mlx4: Fix the SQ size of an RC QP

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Yishai Hadas 

commit f2940e2c76bb554a7fbdd28ca5b90904117a9e96 upstream.

When calculating the required size of an RC QP send queue, leave
enough space for masked atomic operations, which require more space than
"regular" atomic operation.

Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations")
Signed-off-by: Yishai Hadas 
Reviewed-by: Jack Morgenstein 
Reviewed-by: Eran Ben Elisha 
Signed-off-by: Leon Romanovsky 
Signed-off-by: Doug Ledford 
Signed-off-by: Ben Hutchings 
---
 drivers/infiniband/hw/mlx4/qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -283,7 +283,7 @@ static int send_wqe_overhead(enum ib_qp_
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
-   sizeof (struct mlx4_wqe_atomic_seg) +
+   sizeof (struct mlx4_wqe_masked_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_SMI:
case IB_QPT_GSI:



[PATCH 3.16 173/305] USB: quirks: Fix entries on wrong list in 3.16.y

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Ben Hutchings 

Commits ddbe1fca0bcb ("USB: Add device quirk for ASUS T100 Base
Station keyboard") and e5dff0e80463 ("USB: Add OTG PET device to TPL")
were wrongly backported to 3.16.y.  The original commits added to
usb_quirk_list but the backported versions added to
usb_interface_quirk_list.

Signed-off-by: Ben Hutchings 
---
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -188,6 +188,14 @@ static const struct usb_device_id usb_qu
/* USB3503 */
{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* ASUS Base Station(T100) */
+   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
+   /* Protocol and OTG Electrical Test Device */
+   { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
+   USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+
/* Blackmagic Design Intensity Shuttle */
{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
 
@@ -202,14 +210,6 @@ static const struct usb_device_id usb_in
{ USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0),
  .driver_info = USB_QUIRK_RESET_RESUME },
 
-   /* ASUS Base Station(T100) */
-   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
-   /* Protocol and OTG Electrical Test Device */
-   { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
-   USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
-
{ }  /* terminating entry must be last */
 };
 



[PATCH 3.16 078/305] ring-buffer: Prevent overflow of size in ring_buffer_resize()

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Steven Rostedt (Red Hat)" 

commit 59643d1535eb220668692a5359de22545af579f6 upstream.

If the size passed to ring_buffer_resize() is greater than MAX_LONG - 
BUF_PAGE_SIZE
then the DIV_ROUND_UP() will return zero.

Here's the details:

  # echo 18014398509481980 > /sys/kernel/debug/tracing/buffer_size_kb

tracing_entries_write() processes this and converts kb to bytes.

 18014398509481980 << 10 = 18446744073709547520

and this is passed to ring_buffer_resize() as unsigned long size.

 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);

Where DIV_ROUND_UP(a, b) is (a + b - 1)/b

BUF_PAGE_SIZE is 4080 and here

 18446744073709547520 + 4080 - 1 = 18446744073709551599

where 18446744073709551599 is still smaller than 2^64

 2^64 - 18446744073709551599 = 17

But now 18446744073709551599 / 4080 = 4521260802379792

and size = size * 4080 = 18446744073709551360

This is checked to make sure its still greater than 2 * 4080,
which it is.

Then we convert to the number of buffer pages needed.

 nr_page = DIV_ROUND_UP(size, BUF_PAGE_SIZE)

but this time size is 18446744073709551360 and

 2^64 - (18446744073709551360 + 4080 - 1) = -3823

Thus it overflows and the resulting number is less than 4080, which makes

  3823 / 4080 = 0

an nr_pages is set to this. As we already checked against the minimum that
nr_pages may be, this causes the logic to fail as well, and we crash the
kernel.

There's no reason to have the two DIV_ROUND_UP() (that's just result of
historical code changes), clean up the code and fix this bug.

Fixes: 83f40318dab00 ("ring-buffer: Make removal of ring buffer pages atomic")
Signed-off-by: Steven Rostedt 
Signed-off-by: Ben Hutchings 
---
 kernel/trace/ring_buffer.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1694,14 +1694,13 @@ int ring_buffer_resize(struct ring_buffe
!cpumask_test_cpu(cpu_id, buffer->cpumask))
return size;
 
-   size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
-   size *= BUF_PAGE_SIZE;
+   nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 
/* we need a minimum of two pages */
-   if (size < BUF_PAGE_SIZE * 2)
-   size = BUF_PAGE_SIZE * 2;
+   if (nr_pages < 2)
+   nr_pages = 2;
 
-   nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+   size = nr_pages * BUF_PAGE_SIZE;
 
/*
 * Don't succeed if resizing is disabled, as a reader might be



[PATCH 3.16 277/305] ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Kangjie Lu 

commit cec8f96e49d9be372fdb0c3836dcf31ec71e457e upstream.

The stack object “tread” has a total size of 32 bytes. Its field
“event” and “val” both contain 4 bytes padding. These 8 bytes
padding bytes are sent to user without being initialized.

Signed-off-by: Kangjie Lu 
Signed-off-by: Takashi Iwai 
Signed-off-by: Ben Hutchings 
---
 sound/core/timer.c | 1 +
 1 file changed, 1 insertion(+)

--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1750,6 +1750,7 @@ static int snd_timer_user_params(struct
if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) {
if (tu->tread) {
struct snd_timer_tread tread;
+   memset(, 0, sizeof(tread));
tread.event = SNDRV_TIMER_EVENT_EARLY;
tread.tstamp.tv_sec = 0;
tread.tstamp.tv_nsec = 0;



[PATCH 3.16 053/305] MIPS: Don't unwind to user mode with EVA

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: James Hogan 

commit a816b306c62195b7c43c92cb13330821a96bdc27 upstream.

When unwinding through IRQs and exceptions, the unwinding only continues
if the PC is a kernel text address, however since EVA it is possible for
user and kernel address ranges to overlap, potentially allowing
unwinding to continue to user mode if the user PC happens to be in the
kernel text address range.

Adjust the check to also ensure that the register state from before the
exception is actually running in kernel mode, i.e. !user_mode(regs).

I don't believe any harm can come of this problem, since the PC is only
output, the stack pointer is checked to ensure it resides within the
task's stack page before it is dereferenced in search of the return
address, and the return address register is similarly only output (if
the PC is in a leaf function or the beginning of a non-leaf function).

However unwind_stack() is only meant for unwinding kernel code, so to be
correct the unwind should stop there.

Signed-off-by: James Hogan 
Reviewed-by: Leonid Yegoshin 
Cc: linux-m...@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11700/
Signed-off-by: Ralf Baechle 
Signed-off-by: Ben Hutchings 
---
 arch/mips/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -489,7 +489,7 @@ unsigned long notrace unwind_stack_by_ad
*sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
regs = (struct pt_regs *)*sp;
pc = regs->cp0_epc;
-   if (__kernel_text_address(pc)) {
+   if (!user_mode(regs) && __kernel_text_address(pc)) {
*sp = regs->regs[29];
*ra = regs->regs[31];
return pc;



[PATCH 3.16 078/305] ring-buffer: Prevent overflow of size in ring_buffer_resize()

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Steven Rostedt (Red Hat)" 

commit 59643d1535eb220668692a5359de22545af579f6 upstream.

If the size passed to ring_buffer_resize() is greater than MAX_LONG - 
BUF_PAGE_SIZE
then the DIV_ROUND_UP() will return zero.

Here's the details:

  # echo 18014398509481980 > /sys/kernel/debug/tracing/buffer_size_kb

tracing_entries_write() processes this and converts kb to bytes.

 18014398509481980 << 10 = 18446744073709547520

and this is passed to ring_buffer_resize() as unsigned long size.

 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);

Where DIV_ROUND_UP(a, b) is (a + b - 1)/b

BUF_PAGE_SIZE is 4080 and here

 18446744073709547520 + 4080 - 1 = 18446744073709551599

where 18446744073709551599 is still smaller than 2^64

 2^64 - 18446744073709551599 = 17

But now 18446744073709551599 / 4080 = 4521260802379792

and size = size * 4080 = 18446744073709551360

This is checked to make sure its still greater than 2 * 4080,
which it is.

Then we convert to the number of buffer pages needed.

 nr_page = DIV_ROUND_UP(size, BUF_PAGE_SIZE)

but this time size is 18446744073709551360 and

 2^64 - (18446744073709551360 + 4080 - 1) = -3823

Thus it overflows and the resulting number is less than 4080, which makes

  3823 / 4080 = 0

an nr_pages is set to this. As we already checked against the minimum that
nr_pages may be, this causes the logic to fail as well, and we crash the
kernel.

There's no reason to have the two DIV_ROUND_UP() (that's just result of
historical code changes), clean up the code and fix this bug.

Fixes: 83f40318dab00 ("ring-buffer: Make removal of ring buffer pages atomic")
Signed-off-by: Steven Rostedt 
Signed-off-by: Ben Hutchings 
---
 kernel/trace/ring_buffer.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1694,14 +1694,13 @@ int ring_buffer_resize(struct ring_buffe
!cpumask_test_cpu(cpu_id, buffer->cpumask))
return size;
 
-   size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
-   size *= BUF_PAGE_SIZE;
+   nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 
/* we need a minimum of two pages */
-   if (size < BUF_PAGE_SIZE * 2)
-   size = BUF_PAGE_SIZE * 2;
+   if (nr_pages < 2)
+   nr_pages = 2;
 
-   nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+   size = nr_pages * BUF_PAGE_SIZE;
 
/*
 * Don't succeed if resizing is disabled, as a reader might be



[PATCH 3.16 277/305] ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Kangjie Lu 

commit cec8f96e49d9be372fdb0c3836dcf31ec71e457e upstream.

The stack object “tread” has a total size of 32 bytes. Its field
“event” and “val” both contain 4 bytes padding. These 8 bytes
padding bytes are sent to user without being initialized.

Signed-off-by: Kangjie Lu 
Signed-off-by: Takashi Iwai 
Signed-off-by: Ben Hutchings 
---
 sound/core/timer.c | 1 +
 1 file changed, 1 insertion(+)

--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1750,6 +1750,7 @@ static int snd_timer_user_params(struct
if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) {
if (tu->tread) {
struct snd_timer_tread tread;
+   memset(, 0, sizeof(tread));
tread.event = SNDRV_TIMER_EVENT_EARLY;
tread.tstamp.tv_sec = 0;
tread.tstamp.tv_nsec = 0;



[PATCH 3.16 053/305] MIPS: Don't unwind to user mode with EVA

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: James Hogan 

commit a816b306c62195b7c43c92cb13330821a96bdc27 upstream.

When unwinding through IRQs and exceptions, the unwinding only continues
if the PC is a kernel text address, however since EVA it is possible for
user and kernel address ranges to overlap, potentially allowing
unwinding to continue to user mode if the user PC happens to be in the
kernel text address range.

Adjust the check to also ensure that the register state from before the
exception is actually running in kernel mode, i.e. !user_mode(regs).

I don't believe any harm can come of this problem, since the PC is only
output, the stack pointer is checked to ensure it resides within the
task's stack page before it is dereferenced in search of the return
address, and the return address register is similarly only output (if
the PC is in a leaf function or the beginning of a non-leaf function).

However unwind_stack() is only meant for unwinding kernel code, so to be
correct the unwind should stop there.

Signed-off-by: James Hogan 
Reviewed-by: Leonid Yegoshin 
Cc: linux-m...@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11700/
Signed-off-by: Ralf Baechle 
Signed-off-by: Ben Hutchings 
---
 arch/mips/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -489,7 +489,7 @@ unsigned long notrace unwind_stack_by_ad
*sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
regs = (struct pt_regs *)*sp;
pc = regs->cp0_epc;
-   if (__kernel_text_address(pc)) {
+   if (!user_mode(regs) && __kernel_text_address(pc)) {
*sp = regs->regs[29];
*ra = regs->regs[31];
return pc;



[PATCH 3.2 29/94] sunrpc: Update RPCBIND_MAXNETIDLEN

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Chuck Lever 

commit 4b9c7f9db9a003f5c342184dc4401c1b7f2efb39 upstream.

Commit 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL
transports") added a 5-character netid, but did not bump
RPCBIND_MAXNETIDLEN from 4 to 5.

Fixes: 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL ...")
Signed-off-by: Chuck Lever 
Signed-off-by: Anna Schumaker 
Signed-off-by: Ben Hutchings 
---
 include/linux/sunrpc/msg_prot.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -149,9 +149,9 @@ typedef __be32  rpc_fraghdr;
 
 /*
  * Note that RFC 1833 does not put any size restrictions on the
- * netid string, but all currently defined netid's fit in 4 bytes.
+ * netid string, but all currently defined netid's fit in 5 bytes.
  */
-#define RPCBIND_MAXNETIDLEN(4u)
+#define RPCBIND_MAXNETIDLEN(5u)
 
 /*
  * Universal addresses are introduced in RFC 1833 and further spelled



[PATCH 3.16 281/305] rds: fix an infoleak in rds_inc_info_copy

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Kangjie Lu 

commit 4116def2337991b39919f3b448326e21c40e0dbb upstream.

The last field "flags" of object "minfo" is not initialized.
Copying this object out may leak kernel stack data.
Assign 0 to it to avoid leak.

Signed-off-by: Kangjie Lu 
Acked-by: Santosh Shilimkar 
Signed-off-by: David S. Miller 
Signed-off-by: Ben Hutchings 
---
 net/rds/recv.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -543,5 +543,7 @@ void rds_inc_info_copy(struct rds_incomi
minfo.fport = inc->i_hdr.h_dport;
}
 
+   minfo.flags = 0;
+
rds_info_copy(iter, , sizeof(minfo));
 }



[PATCH 3.2 29/94] sunrpc: Update RPCBIND_MAXNETIDLEN

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Chuck Lever 

commit 4b9c7f9db9a003f5c342184dc4401c1b7f2efb39 upstream.

Commit 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL
transports") added a 5-character netid, but did not bump
RPCBIND_MAXNETIDLEN from 4 to 5.

Fixes: 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL ...")
Signed-off-by: Chuck Lever 
Signed-off-by: Anna Schumaker 
Signed-off-by: Ben Hutchings 
---
 include/linux/sunrpc/msg_prot.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -149,9 +149,9 @@ typedef __be32  rpc_fraghdr;
 
 /*
  * Note that RFC 1833 does not put any size restrictions on the
- * netid string, but all currently defined netid's fit in 4 bytes.
+ * netid string, but all currently defined netid's fit in 5 bytes.
  */
-#define RPCBIND_MAXNETIDLEN(4u)
+#define RPCBIND_MAXNETIDLEN(5u)
 
 /*
  * Universal addresses are introduced in RFC 1833 and further spelled



[PATCH 3.16 281/305] rds: fix an infoleak in rds_inc_info_copy

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Kangjie Lu 

commit 4116def2337991b39919f3b448326e21c40e0dbb upstream.

The last field "flags" of object "minfo" is not initialized.
Copying this object out may leak kernel stack data.
Assign 0 to it to avoid leak.

Signed-off-by: Kangjie Lu 
Acked-by: Santosh Shilimkar 
Signed-off-by: David S. Miller 
Signed-off-by: Ben Hutchings 
---
 net/rds/recv.c | 2 ++
 1 file changed, 2 insertions(+)

--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -543,5 +543,7 @@ void rds_inc_info_copy(struct rds_incomi
minfo.fport = inc->i_hdr.h_dport;
}
 
+   minfo.flags = 0;
+
rds_info_copy(iter, , sizeof(minfo));
 }



[PATCH 3.16 005/305] ath5k: Change led pin configuration for compaq c700 laptop

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Joseph Salisbury 

commit 7b9bc799a445aea95f64f15e0083cb19b5789abe upstream.

BugLink: http://bugs.launchpad.net/bugs/972604

Commit 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin
configuration for compaq c700 laptop") added a pin configuration for the Compaq
c700 laptop.  However, the polarity of the led pin is reversed.  It should be
red for wifi off and blue for wifi on, but it is the opposite.  This bug was
reported in the following bug report:
http://pad.lv/972604

Fixes: 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin 
configuration for compaq c700 laptop")
Signed-off-by: Joseph Salisbury 
Signed-off-by: Kalle Valo 
Signed-off-by: Ben Hutchings 
---
 drivers/net/wireless/ath/ath5k/led.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/wireless/ath/ath5k/led.c
+++ b/drivers/net/wireless/ath/ath5k/led.c
@@ -77,7 +77,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath5k_led
/* HP Compaq CQ60-206US (ddregg...@jumptv.com) */
{ ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) },
/* HP Compaq C700 (nitrous...@gmail.com) */
-   { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) },
+   { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) },
/* LiteOn AR5BXB63 (mag...@salug.it) */
{ ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) },
/* IBM-specific AR5212 (all others) */



[PATCH 3.16 201/305] hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Pali Rohár 

commit 7613663cc186f8f3c50279390ddc60286758001c upstream.

For security reasons ordinary user must not be able to control fan speed
via /proc/i8k by default. Some malicious software running under "nobody"
user could be able to turn fan off and cause HW problems. So this patch
changes default value of "restricted" parameter to 1.

Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted"
parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from
sysfs file /sys/class/dmi/id/product_serial.

Old non secure behaviour of file /proc/i8k can be achieved by loading this
module with "restricted" parameter set to 0.

Note that this patch has effects only for kernels compiled with CONFIG_I8K
and only for file /proc/i8k. Hwmon interface provided by this driver was
not changed and root access for setting fan speed was needed also before.

Reported-by: Mario Limonciello 
Signed-off-by: Pali Rohár 
Signed-off-by: Guenter Roeck 
[bwh: Backported to 3.16: adjust filename, context]
Signed-off-by: Ben Hutchings 
---
 drivers/char/i8k.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -62,6 +62,7 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static int i8k_fan_mult;
@@ -85,13 +86,13 @@ static bool ignore_dmi;
 module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does 
not match");
 
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to 
CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 
0)");
 
 static int fan_mult = I8K_FAN_MULT;
 module_param(fan_mult, int, 0);
@@ -350,9 +351,11 @@ i8k_ioctl_unlocked(struct file *fp, unsi
break;
 
case I8K_MACHINE_ID:
-   memset(buff, 0, 16);
-   strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-   sizeof(buff));
+   if (restricted && !capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
+   memset(buff, 0, sizeof(buff));
+   strlcpy(buff, bios_machineid, sizeof(buff));
break;
 
case I8K_FN_STATUS:
@@ -469,7 +472,7 @@ static int i8k_proc_show(struct seq_file
return seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
  I8K_PROC_FMT,
  bios_version,
- i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+ (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : 
bios_machineid,
  cpu_temp,
  left_fan, right_fan, left_speed, right_speed,
  ac_power, fn_key);
@@ -765,6 +768,8 @@ static int __init i8k_probe(void)
 
strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
sizeof(bios_version));
+   strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+   sizeof(bios_machineid));
 
/*
 * Get SMM Dell signature



[PATCH 3.16 224/305] ALSA: dummy: Fix a use-after-free at closing

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Takashi Iwai 

commit d5dbbe6569481bf12dcbe3e12cff72c5f78d272c upstream.

syzkaller fuzzer spotted a potential use-after-free case in snd-dummy
driver when hrtimer is used as backend:
> ==
> BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr 88005e5b6f68
>  Read of size 8 by task syz-executor/8984
> =
> BUG kmalloc-192 (Not tainted): kasan: bad access detected
> -
>
> Disabling lock debugging due to kernel taint
> INFO: Allocated in 0x age=18446705582212484632
> 
> [<  none  >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464
> 
> INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1
> [<  none  >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481
> 
> Call Trace:
>  [] __asan_report_load8_noabort+0x3e/0x40 
> mm/kasan/report.c:333
>  [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111
>  [< inline >] __rb_erase_augmented 
> include/linux/rbtree_augmented.h:218
>  [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427
>  [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86
>  [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903
>  [< inline >] remove_hrtimer kernel/time/hrtimer.c:945
>  [] hrtimer_try_to_cancel+0x22a/0x570 
> kernel/time/hrtimer.c:1046
>  [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066
>  [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417
>  [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507
>  [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106
>  [] snd_pcm_action_single+0x76/0x120 
> sound/core/pcm_native.c:956
>  [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974
>  [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139
>  [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784
>  [] snd_pcm_common_ioctl1+0xfae/0x2150 
> sound/core/pcm_native.c:2805
>  [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 
> sound/core/pcm_native.c:2976
>  [] snd_pcm_kernel_ioctl+0x11c/0x160 
> sound/core/pcm_native.c:3020
>  [] snd_pcm_oss_sync+0x3a4/0xa30 
> sound/core/oss/pcm_oss.c:1693
>  [] snd_pcm_oss_release+0x1ad/0x280 
> sound/core/oss/pcm_oss.c:2483
>  .

A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which
is called certainly before other blocking ops.

Reported-by: Dmitry Vyukov 
Tested-by: Dmitry Vyukov 
Signed-off-by: Takashi Iwai 
Signed-off-by: Ben Hutchings 
---
 sound/drivers/dummy.c | 1 +
 1 file changed, 1 insertion(+)

--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -422,6 +422,7 @@ static int dummy_hrtimer_stop(struct snd
 
 static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
 {
+   hrtimer_cancel(>timer);
tasklet_kill(>tasklet);
 }
 



[PATCH 3.16 005/305] ath5k: Change led pin configuration for compaq c700 laptop

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Joseph Salisbury 

commit 7b9bc799a445aea95f64f15e0083cb19b5789abe upstream.

BugLink: http://bugs.launchpad.net/bugs/972604

Commit 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin
configuration for compaq c700 laptop") added a pin configuration for the Compaq
c700 laptop.  However, the polarity of the led pin is reversed.  It should be
red for wifi off and blue for wifi on, but it is the opposite.  This bug was
reported in the following bug report:
http://pad.lv/972604

Fixes: 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin 
configuration for compaq c700 laptop")
Signed-off-by: Joseph Salisbury 
Signed-off-by: Kalle Valo 
Signed-off-by: Ben Hutchings 
---
 drivers/net/wireless/ath/ath5k/led.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/net/wireless/ath/ath5k/led.c
+++ b/drivers/net/wireless/ath/ath5k/led.c
@@ -77,7 +77,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath5k_led
/* HP Compaq CQ60-206US (ddregg...@jumptv.com) */
{ ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) },
/* HP Compaq C700 (nitrous...@gmail.com) */
-   { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) },
+   { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) },
/* LiteOn AR5BXB63 (mag...@salug.it) */
{ ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) },
/* IBM-specific AR5212 (all others) */



[PATCH 3.16 201/305] hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Pali Rohár 

commit 7613663cc186f8f3c50279390ddc60286758001c upstream.

For security reasons ordinary user must not be able to control fan speed
via /proc/i8k by default. Some malicious software running under "nobody"
user could be able to turn fan off and cause HW problems. So this patch
changes default value of "restricted" parameter to 1.

Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted"
parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from
sysfs file /sys/class/dmi/id/product_serial.

Old non secure behaviour of file /proc/i8k can be achieved by loading this
module with "restricted" parameter set to 0.

Note that this patch has effects only for kernels compiled with CONFIG_I8K
and only for file /proc/i8k. Hwmon interface provided by this driver was
not changed and root access for setting fan speed was needed also before.

Reported-by: Mario Limonciello 
Signed-off-by: Pali Rohár 
Signed-off-by: Guenter Roeck 
[bwh: Backported to 3.16: adjust filename, context]
Signed-off-by: Ben Hutchings 
---
 drivers/char/i8k.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -62,6 +62,7 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static int i8k_fan_mult;
@@ -85,13 +86,13 @@ static bool ignore_dmi;
 module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does 
not match");
 
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to 
CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 
0)");
 
 static int fan_mult = I8K_FAN_MULT;
 module_param(fan_mult, int, 0);
@@ -350,9 +351,11 @@ i8k_ioctl_unlocked(struct file *fp, unsi
break;
 
case I8K_MACHINE_ID:
-   memset(buff, 0, 16);
-   strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-   sizeof(buff));
+   if (restricted && !capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
+   memset(buff, 0, sizeof(buff));
+   strlcpy(buff, bios_machineid, sizeof(buff));
break;
 
case I8K_FN_STATUS:
@@ -469,7 +472,7 @@ static int i8k_proc_show(struct seq_file
return seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
  I8K_PROC_FMT,
  bios_version,
- i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+ (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : 
bios_machineid,
  cpu_temp,
  left_fan, right_fan, left_speed, right_speed,
  ac_power, fn_key);
@@ -765,6 +768,8 @@ static int __init i8k_probe(void)
 
strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
sizeof(bios_version));
+   strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+   sizeof(bios_machineid));
 
/*
 * Get SMM Dell signature



[PATCH 3.16 224/305] ALSA: dummy: Fix a use-after-free at closing

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Takashi Iwai 

commit d5dbbe6569481bf12dcbe3e12cff72c5f78d272c upstream.

syzkaller fuzzer spotted a potential use-after-free case in snd-dummy
driver when hrtimer is used as backend:
> ==
> BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr 88005e5b6f68
>  Read of size 8 by task syz-executor/8984
> =
> BUG kmalloc-192 (Not tainted): kasan: bad access detected
> -
>
> Disabling lock debugging due to kernel taint
> INFO: Allocated in 0x age=18446705582212484632
> 
> [<  none  >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464
> 
> INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1
> [<  none  >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481
> 
> Call Trace:
>  [] __asan_report_load8_noabort+0x3e/0x40 
> mm/kasan/report.c:333
>  [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111
>  [< inline >] __rb_erase_augmented 
> include/linux/rbtree_augmented.h:218
>  [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427
>  [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86
>  [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903
>  [< inline >] remove_hrtimer kernel/time/hrtimer.c:945
>  [] hrtimer_try_to_cancel+0x22a/0x570 
> kernel/time/hrtimer.c:1046
>  [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066
>  [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417
>  [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507
>  [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106
>  [] snd_pcm_action_single+0x76/0x120 
> sound/core/pcm_native.c:956
>  [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974
>  [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139
>  [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784
>  [] snd_pcm_common_ioctl1+0xfae/0x2150 
> sound/core/pcm_native.c:2805
>  [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 
> sound/core/pcm_native.c:2976
>  [] snd_pcm_kernel_ioctl+0x11c/0x160 
> sound/core/pcm_native.c:3020
>  [] snd_pcm_oss_sync+0x3a4/0xa30 
> sound/core/oss/pcm_oss.c:1693
>  [] snd_pcm_oss_release+0x1ad/0x280 
> sound/core/oss/pcm_oss.c:2483
>  .

A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which
is called certainly before other blocking ops.

Reported-by: Dmitry Vyukov 
Tested-by: Dmitry Vyukov 
Signed-off-by: Takashi Iwai 
Signed-off-by: Ben Hutchings 
---
 sound/drivers/dummy.c | 1 +
 1 file changed, 1 insertion(+)

--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -422,6 +422,7 @@ static int dummy_hrtimer_stop(struct snd
 
 static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
 {
+   hrtimer_cancel(>timer);
tasklet_kill(>tasklet);
 }
 



[PATCH 3.2 05/94] alpha/PCI: Call iomem_is_exclusive() for IORESOURCE_MEM, but not IORESOURCE_IO

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Bjorn Helgaas 

commit c20e128030caf0537d5e906753eac1c28fefdb75 upstream.

The alpha pci_mmap_resource() is used for both IORESOURCE_MEM and
IORESOURCE_IO resources, but iomem_is_exclusive() is only applicable for
IORESOURCE_MEM.

Call iomem_is_exclusive() only for IORESOURCE_MEM resources, and do it
earlier to match the generic version of pci_mmap_resource().

Fixes: 10a0ef39fbd1 ("PCI/alpha: pci sysfs resources")
Signed-off-by: Bjorn Helgaas 
CC: Ivan Kokshaysky 
Signed-off-by: Ben Hutchings 
---
 arch/alpha/kernel/pci-sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -78,10 +78,10 @@ static int pci_mmap_resource(struct kobj
if (i >= PCI_ROM_RESOURCE)
return -ENODEV;
 
-   if (!__pci_mmap_fits(pdev, i, vma, sparse))
+   if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
return -EINVAL;
 
-   if (iomem_is_exclusive(res->start))
+   if (!__pci_mmap_fits(pdev, i, vma, sparse))
return -EINVAL;
 
pcibios_resource_to_bus(pdev, , res);



[PATCH 3.16 270/305] xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Jan Beulich 

commit 6f2d9d99213514360034c6d52d2c3919290b3504 upstream.

As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and
CPUID[0x8007].EDX[7] anymore, causing the driver to fail to load on
both Intel and AMD systems. Doing any kind of hardware capability
checks in the driver as a prerequisite was wrong anyway: With the
hypervisor being in charge, all such checking should be done by it. If
ACPI data gets uploaded despite some missing capability, the hypervisor
is free to ignore part or all of that data.

Ditch the entire check_prereq() function, and do the only valid check
(xen_initial_domain()) in the caller in its place.

Signed-off-by: Jan Beulich 
Signed-off-by: David Vrabel 
Signed-off-by: Ben Hutchings 
---
 drivers/xen/xen-acpi-processor.c | 35 +++
 1 file changed, 3 insertions(+), 32 deletions(-)

--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -423,36 +423,7 @@ upload:
 
return 0;
 }
-static int __init check_prereq(void)
-{
-   struct cpuinfo_x86 *c = _data(0);
-
-   if (!xen_initial_domain())
-   return -ENODEV;
-
-   if (!acpi_gbl_FADT.smi_command)
-   return -ENODEV;
 
-   if (c->x86_vendor == X86_VENDOR_INTEL) {
-   if (!cpu_has(c, X86_FEATURE_EST))
-   return -ENODEV;
-
-   return 0;
-   }
-   if (c->x86_vendor == X86_VENDOR_AMD) {
-   /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-* as we get compile warnings for the static functions.
-*/
-#define CPUID_FREQ_VOLT_CAPABILITIES0x8007
-#define USE_HW_PSTATE   0x0080
-   u32 eax, ebx, ecx, edx;
-   cpuid(CPUID_FREQ_VOLT_CAPABILITIES, , , , );
-   if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
-   return -ENODEV;
-   return 0;
-   }
-   return -ENODEV;
-}
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor
 static int __init xen_acpi_processor_init(void)
 {
unsigned int i;
-   int rc = check_prereq();
+   int rc;
 
-   if (rc)
-   return rc;
+   if (!xen_initial_domain())
+   return -ENODEV;
 
nr_acpi_bits = get_max_acpi_id() + 1;
acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned 
long), GFP_KERNEL);



[PATCH 3.2 05/94] alpha/PCI: Call iomem_is_exclusive() for IORESOURCE_MEM, but not IORESOURCE_IO

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Bjorn Helgaas 

commit c20e128030caf0537d5e906753eac1c28fefdb75 upstream.

The alpha pci_mmap_resource() is used for both IORESOURCE_MEM and
IORESOURCE_IO resources, but iomem_is_exclusive() is only applicable for
IORESOURCE_MEM.

Call iomem_is_exclusive() only for IORESOURCE_MEM resources, and do it
earlier to match the generic version of pci_mmap_resource().

Fixes: 10a0ef39fbd1 ("PCI/alpha: pci sysfs resources")
Signed-off-by: Bjorn Helgaas 
CC: Ivan Kokshaysky 
Signed-off-by: Ben Hutchings 
---
 arch/alpha/kernel/pci-sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -78,10 +78,10 @@ static int pci_mmap_resource(struct kobj
if (i >= PCI_ROM_RESOURCE)
return -ENODEV;
 
-   if (!__pci_mmap_fits(pdev, i, vma, sparse))
+   if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
return -EINVAL;
 
-   if (iomem_is_exclusive(res->start))
+   if (!__pci_mmap_fits(pdev, i, vma, sparse))
return -EINVAL;
 
pcibios_resource_to_bus(pdev, , res);



[PATCH 3.16 270/305] xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Jan Beulich 

commit 6f2d9d99213514360034c6d52d2c3919290b3504 upstream.

As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and
CPUID[0x8007].EDX[7] anymore, causing the driver to fail to load on
both Intel and AMD systems. Doing any kind of hardware capability
checks in the driver as a prerequisite was wrong anyway: With the
hypervisor being in charge, all such checking should be done by it. If
ACPI data gets uploaded despite some missing capability, the hypervisor
is free to ignore part or all of that data.

Ditch the entire check_prereq() function, and do the only valid check
(xen_initial_domain()) in the caller in its place.

Signed-off-by: Jan Beulich 
Signed-off-by: David Vrabel 
Signed-off-by: Ben Hutchings 
---
 drivers/xen/xen-acpi-processor.c | 35 +++
 1 file changed, 3 insertions(+), 32 deletions(-)

--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -423,36 +423,7 @@ upload:
 
return 0;
 }
-static int __init check_prereq(void)
-{
-   struct cpuinfo_x86 *c = _data(0);
-
-   if (!xen_initial_domain())
-   return -ENODEV;
-
-   if (!acpi_gbl_FADT.smi_command)
-   return -ENODEV;
 
-   if (c->x86_vendor == X86_VENDOR_INTEL) {
-   if (!cpu_has(c, X86_FEATURE_EST))
-   return -ENODEV;
-
-   return 0;
-   }
-   if (c->x86_vendor == X86_VENDOR_AMD) {
-   /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-* as we get compile warnings for the static functions.
-*/
-#define CPUID_FREQ_VOLT_CAPABILITIES0x8007
-#define USE_HW_PSTATE   0x0080
-   u32 eax, ebx, ecx, edx;
-   cpuid(CPUID_FREQ_VOLT_CAPABILITIES, , , , );
-   if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
-   return -ENODEV;
-   return 0;
-   }
-   return -ENODEV;
-}
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor
 static int __init xen_acpi_processor_init(void)
 {
unsigned int i;
-   int rc = check_prereq();
+   int rc;
 
-   if (rc)
-   return rc;
+   if (!xen_initial_domain())
+   return -ENODEV;
 
nr_acpi_bits = get_max_acpi_id() + 1;
acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned 
long), GFP_KERNEL);



[PATCH 3.16 133/305] powerpc: Use privileged SPR number for MMCR2

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Thomas Huth 

commit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream.

We are already using the privileged versions of MMCR0, MMCR1
and MMCRA in the kernel, so for MMCR2, we should better use
the privileged versions, too, to be consistent.

Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8")
Suggested-by: Paul Mackerras 
Signed-off-by: Thomas Huth 
Acked-by: Paul Mackerras 
Signed-off-by: Michael Ellerman 
Signed-off-by: Ben Hutchings 
---
 arch/powerpc/include/asm/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -704,7 +704,7 @@
 #define   MMCR0_FCWAIT 0x0002UL /* freeze counter in WAIT state */
 #define   MMCR0_FCHV   0x0001UL /* freeze conditions in hypervisor mode */
 #define SPRN_MMCR1 798
-#define SPRN_MMCR2 769
+#define SPRN_MMCR2 785
 #define SPRN_MMCRA 0x312
 #define   MMCRA_SDSYNC 0x8000UL /* SDAR synced with SIAR */
 #define   MMCRA_SDAR_DCACHE_MISS 0x4000UL



[PATCH 3.16 133/305] powerpc: Use privileged SPR number for MMCR2

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Thomas Huth 

commit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream.

We are already using the privileged versions of MMCR0, MMCR1
and MMCRA in the kernel, so for MMCR2, we should better use
the privileged versions, too, to be consistent.

Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8")
Suggested-by: Paul Mackerras 
Signed-off-by: Thomas Huth 
Acked-by: Paul Mackerras 
Signed-off-by: Michael Ellerman 
Signed-off-by: Ben Hutchings 
---
 arch/powerpc/include/asm/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -704,7 +704,7 @@
 #define   MMCR0_FCWAIT 0x0002UL /* freeze counter in WAIT state */
 #define   MMCR0_FCHV   0x0001UL /* freeze conditions in hypervisor mode */
 #define SPRN_MMCR1 798
-#define SPRN_MMCR2 769
+#define SPRN_MMCR2 785
 #define SPRN_MMCRA 0x312
 #define   MMCRA_SDSYNC 0x8000UL /* SDAR synced with SIAR */
 #define   MMCRA_SDAR_DCACHE_MISS 0x4000UL



[PATCH 3.2 28/94] fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Stefan Metzmacher 

commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 upstream.

Only server which map unknown users to guest will allow
access using a non-null NTLMv2_Response.

For Samba it's the "map to guest = bad user" option.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11913

Signed-off-by: Stefan Metzmacher 
Signed-off-by: Steve French 
[bwh: Backported to 3.2:
 - Adjust context, indentation
 - Keep using cERROR()]
Signed-off-by: Ben Hutchings 
---
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -727,21 +727,26 @@ ssetup_ntlmssp_authenticate:
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
 
-   /* calculate nlmv2 response and session key */
-   rc = setup_ntlmv2_rsp(ses, nls_cp);
-   if (rc) {
-   cERROR(1, "Error %d during NTLMv2 authentication", rc);
-   goto ssetup_exit;
-   }
-   memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
-   ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-   bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+   if (ses->user_name != NULL) {
+   /* calculate nlmv2 response and session key */
+   rc = setup_ntlmv2_rsp(ses, nls_cp);
+   if (rc) {
+   cERROR(1, "Error %d during NTLMv2 
authentication", rc);
+   goto ssetup_exit;
+   }
+
+   memcpy(bcc_ptr, ses->auth_key.response + 
CIFS_SESS_KEY_SIZE,
+   ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+   bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
 
-   /* set case sensitive password length after tilen may get
-* assigned, tilen is 0 otherwise.
-*/
-   pSMB->req_no_secext.CaseSensitivePasswordLength =
-   cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+   /* set case sensitive password length after tilen may 
get
+* assigned, tilen is 0 otherwise.
+*/
+   pSMB->req_no_secext.CaseSensitivePasswordLength =
+   cpu_to_le16(ses->auth_key.len - 
CIFS_SESS_KEY_SIZE);
+   } else {
+   pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
+   }
 
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {



[PATCH 3.16 259/305] x86/power/64: Fix kernel text mapping corruption during image restoration

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Rafael J. Wysocki" 

commit 65c0554b73c920023cc8998802e508b798113b46 upstream.

Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).

That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables.  Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.

The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.

To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch.  Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.

Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too.  That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables.  Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.

With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it).  Update RESTORE_MAGIC
too to reflect the image header format change.

Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.

This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.

Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm=146372852823760=2
Reported-by: Logan Gunthorpe 
Reported-and-tested-by: Borislav Petkov 
Tested-by: Kees Cook 
Signed-off-by: Rafael J. Wysocki 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 arch/x86/power/hibernate_64.c | 97 ++-
 arch/x86/power/hibernate_asm_64.S | 55 ++
 2 files changed, 109 insertions(+), 43 deletions(-)

--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Defined in hibernate_asm_64.S */
 extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_
  * kernel's text (this value is passed in the image header).
  */
 unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
 
 pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+   pmd_t *pmd;
+   pud_t *pud;
+
+   /*
+* The new mapping only has to cover the page containing the image
+* kernel's entry point (jump_address_phys), because the switch over to
+* it is carried out by relocated code running from a page allocated
+* specifically for this purpose and covered by the identity mapping, so
+* the temporary kernel text mapping is only needed for the final jump.
+ 

[PATCH 3.2 28/94] fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication

2016-08-13 Thread Ben Hutchings
3.2.82-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Stefan Metzmacher 

commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 upstream.

Only server which map unknown users to guest will allow
access using a non-null NTLMv2_Response.

For Samba it's the "map to guest = bad user" option.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11913

Signed-off-by: Stefan Metzmacher 
Signed-off-by: Steve French 
[bwh: Backported to 3.2:
 - Adjust context, indentation
 - Keep using cERROR()]
Signed-off-by: Ben Hutchings 
---
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -727,21 +727,26 @@ ssetup_ntlmssp_authenticate:
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
 
-   /* calculate nlmv2 response and session key */
-   rc = setup_ntlmv2_rsp(ses, nls_cp);
-   if (rc) {
-   cERROR(1, "Error %d during NTLMv2 authentication", rc);
-   goto ssetup_exit;
-   }
-   memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
-   ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-   bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+   if (ses->user_name != NULL) {
+   /* calculate nlmv2 response and session key */
+   rc = setup_ntlmv2_rsp(ses, nls_cp);
+   if (rc) {
+   cERROR(1, "Error %d during NTLMv2 
authentication", rc);
+   goto ssetup_exit;
+   }
+
+   memcpy(bcc_ptr, ses->auth_key.response + 
CIFS_SESS_KEY_SIZE,
+   ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+   bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
 
-   /* set case sensitive password length after tilen may get
-* assigned, tilen is 0 otherwise.
-*/
-   pSMB->req_no_secext.CaseSensitivePasswordLength =
-   cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+   /* set case sensitive password length after tilen may 
get
+* assigned, tilen is 0 otherwise.
+*/
+   pSMB->req_no_secext.CaseSensitivePasswordLength =
+   cpu_to_le16(ses->auth_key.len - 
CIFS_SESS_KEY_SIZE);
+   } else {
+   pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
+   }
 
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {



[PATCH 3.16 259/305] x86/power/64: Fix kernel text mapping corruption during image restoration

2016-08-13 Thread Ben Hutchings
3.16.37-rc1 review patch.  If anyone has any objections, please let me know.

--

From: "Rafael J. Wysocki" 

commit 65c0554b73c920023cc8998802e508b798113b46 upstream.

Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).

That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables.  Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.

The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.

To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch.  Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.

Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too.  That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables.  Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.

With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it).  Update RESTORE_MAGIC
too to reflect the image header format change.

Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.

This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.

Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm=146372852823760=2
Reported-by: Logan Gunthorpe 
Reported-and-tested-by: Borislav Petkov 
Tested-by: Kees Cook 
Signed-off-by: Rafael J. Wysocki 
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings 
---
 arch/x86/power/hibernate_64.c | 97 ++-
 arch/x86/power/hibernate_asm_64.S | 55 ++
 2 files changed, 109 insertions(+), 43 deletions(-)

--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Defined in hibernate_asm_64.S */
 extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_
  * kernel's text (this value is passed in the image header).
  */
 unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
 
 pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+   pmd_t *pmd;
+   pud_t *pud;
+
+   /*
+* The new mapping only has to cover the page containing the image
+* kernel's entry point (jump_address_phys), because the switch over to
+* it is carried out by relocated code running from a page allocated
+* specifically for this purpose and covered by the identity mapping, so
+* the temporary kernel text mapping is only needed for the final jump.
+* Moreover, in that mapping the virtual address of the image kernel's
+* entry point must be the same as its virtual 

[PATCH] pinctrl: meson: get rid of unneeded domain structures

2016-08-13 Thread Beniamino Galvani
The driver originally supported more domains (register ranges) per
pinctrl device, but since commit 9dab1868ec0d ("pinctrl: amlogic: Make
driver independent from two-domain configuration") each device gets
assigned a single domain and we instantiate multiple pinctrl devices
in the DT.

Therefore, now the 'meson_domain' and 'meson_domain_data' structures
don't have any reason to exist and can be removed to make the model
simpler to understand. This commit doesn't change behavior.

Tested on a Odroid-C2.

Signed-off-by: Beniamino Galvani 
---
 drivers/pinctrl/meson/pinctrl-meson-gxbb.c |  24 +---
 drivers/pinctrl/meson/pinctrl-meson.c  | 222 +++--
 drivers/pinctrl/meson/pinctrl-meson.h  |  52 ++-
 drivers/pinctrl/meson/pinctrl-meson8.c |  24 +---
 drivers/pinctrl/meson/pinctrl-meson8b.c|  24 +---
 5 files changed, 118 insertions(+), 228 deletions(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c 
b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
index cb4d6ad..233c9c8 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -556,38 +556,28 @@ static struct meson_bank meson_gxbb_aobus_banks[] = {
BANK("AO",   PIN(GPIOAO_0, 0),  PIN(GPIOAO_13, 0), 0,  0,  0, 16,  0,  
0,  0, 16,  1,  0),
 };
 
-static struct meson_domain_data meson_gxbb_periphs_domain_data = {
+struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
.name   = "periphs-banks",
-   .banks  = meson_gxbb_periphs_banks,
-   .num_banks  = ARRAY_SIZE(meson_gxbb_periphs_banks),
.pin_base   = 14,
-   .num_pins   = 120,
-};
-
-static struct meson_domain_data meson_gxbb_aobus_domain_data = {
-   .name   = "aobus-banks",
-   .banks  = meson_gxbb_aobus_banks,
-   .num_banks  = ARRAY_SIZE(meson_gxbb_aobus_banks),
-   .pin_base   = 0,
-   .num_pins   = 14,
-};
-
-struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
.pins   = meson_gxbb_periphs_pins,
.groups = meson_gxbb_periphs_groups,
.funcs  = meson_gxbb_periphs_functions,
-   .domain_data= _gxbb_periphs_domain_data,
+   .banks  = meson_gxbb_periphs_banks,
.num_pins   = ARRAY_SIZE(meson_gxbb_periphs_pins),
.num_groups = ARRAY_SIZE(meson_gxbb_periphs_groups),
.num_funcs  = ARRAY_SIZE(meson_gxbb_periphs_functions),
+   .num_banks  = ARRAY_SIZE(meson_gxbb_periphs_banks),
 };
 
 struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data = {
+   .name   = "aobus-banks",
+   .pin_base   = 0,
.pins   = meson_gxbb_aobus_pins,
.groups = meson_gxbb_aobus_groups,
.funcs  = meson_gxbb_aobus_functions,
-   .domain_data= _gxbb_aobus_domain_data,
+   .banks  = meson_gxbb_aobus_banks,
.num_pins   = ARRAY_SIZE(meson_gxbb_aobus_pins),
.num_groups = ARRAY_SIZE(meson_gxbb_aobus_groups),
.num_funcs  = ARRAY_SIZE(meson_gxbb_aobus_functions),
+   .num_banks  = ARRAY_SIZE(meson_gxbb_aobus_banks),
 };
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c 
b/drivers/pinctrl/meson/pinctrl-meson.c
index 11623c6..9678599 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -21,9 +21,8 @@
  * domain which can't be powered off; the bank also uses a set of
  * registers different from the other banks.
  *
- * For each of the two power domains (regular and always-on) there are
- * 4 different register ranges that control the following properties
- * of the pins:
+ * For each pin controller there are 4 different register ranges that
+ * control the following properties of the pins:
  *  1) pin muxing
  *  2) pull enable/disable
  *  3) pull up/down
@@ -33,8 +32,8 @@
  * direction are the same and thus there are only 3 register ranges.
  *
  * Every pinmux group can be enabled by a specific bit in the first
- * register range of the domain; when all groups for a given pin are
- * disabled the pin acts as a GPIO.
+ * register range; when all groups for a given pin are disabled the
+ * pin acts as a GPIO.
  *
  * For the pull and GPIO configuration every bank uses a contiguous
  * set of bits in the register sets described above; the same register
@@ -66,21 +65,21 @@
 /**
  * meson_get_bank() - find the bank containing a given pin
  *
- * @domain:the domain containing the pin
+ * @pc:the pinctrl instance
  * @pin:   the pin number
  * @bank:  the found bank
  *
  * Return: 0 on success, a negative value on error
  */
-static int meson_get_bank(struct meson_domain *domain, unsigned int pin,
+static int meson_get_bank(struct meson_pinctrl *pc, unsigned int pin,
  struct meson_bank **bank)
 {
int i;
 
-   for (i = 0; i < domain->data->num_banks; i++) {
-

[PATCH] pinctrl: meson: get rid of unneeded domain structures

2016-08-13 Thread Beniamino Galvani
The driver originally supported more domains (register ranges) per
pinctrl device, but since commit 9dab1868ec0d ("pinctrl: amlogic: Make
driver independent from two-domain configuration") each device gets
assigned a single domain and we instantiate multiple pinctrl devices
in the DT.

Therefore, now the 'meson_domain' and 'meson_domain_data' structures
don't have any reason to exist and can be removed to make the model
simpler to understand. This commit doesn't change behavior.

Tested on a Odroid-C2.

Signed-off-by: Beniamino Galvani 
---
 drivers/pinctrl/meson/pinctrl-meson-gxbb.c |  24 +---
 drivers/pinctrl/meson/pinctrl-meson.c  | 222 +++--
 drivers/pinctrl/meson/pinctrl-meson.h  |  52 ++-
 drivers/pinctrl/meson/pinctrl-meson8.c |  24 +---
 drivers/pinctrl/meson/pinctrl-meson8b.c|  24 +---
 5 files changed, 118 insertions(+), 228 deletions(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c 
b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
index cb4d6ad..233c9c8 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -556,38 +556,28 @@ static struct meson_bank meson_gxbb_aobus_banks[] = {
BANK("AO",   PIN(GPIOAO_0, 0),  PIN(GPIOAO_13, 0), 0,  0,  0, 16,  0,  
0,  0, 16,  1,  0),
 };
 
-static struct meson_domain_data meson_gxbb_periphs_domain_data = {
+struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
.name   = "periphs-banks",
-   .banks  = meson_gxbb_periphs_banks,
-   .num_banks  = ARRAY_SIZE(meson_gxbb_periphs_banks),
.pin_base   = 14,
-   .num_pins   = 120,
-};
-
-static struct meson_domain_data meson_gxbb_aobus_domain_data = {
-   .name   = "aobus-banks",
-   .banks  = meson_gxbb_aobus_banks,
-   .num_banks  = ARRAY_SIZE(meson_gxbb_aobus_banks),
-   .pin_base   = 0,
-   .num_pins   = 14,
-};
-
-struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
.pins   = meson_gxbb_periphs_pins,
.groups = meson_gxbb_periphs_groups,
.funcs  = meson_gxbb_periphs_functions,
-   .domain_data= _gxbb_periphs_domain_data,
+   .banks  = meson_gxbb_periphs_banks,
.num_pins   = ARRAY_SIZE(meson_gxbb_periphs_pins),
.num_groups = ARRAY_SIZE(meson_gxbb_periphs_groups),
.num_funcs  = ARRAY_SIZE(meson_gxbb_periphs_functions),
+   .num_banks  = ARRAY_SIZE(meson_gxbb_periphs_banks),
 };
 
 struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data = {
+   .name   = "aobus-banks",
+   .pin_base   = 0,
.pins   = meson_gxbb_aobus_pins,
.groups = meson_gxbb_aobus_groups,
.funcs  = meson_gxbb_aobus_functions,
-   .domain_data= _gxbb_aobus_domain_data,
+   .banks  = meson_gxbb_aobus_banks,
.num_pins   = ARRAY_SIZE(meson_gxbb_aobus_pins),
.num_groups = ARRAY_SIZE(meson_gxbb_aobus_groups),
.num_funcs  = ARRAY_SIZE(meson_gxbb_aobus_functions),
+   .num_banks  = ARRAY_SIZE(meson_gxbb_aobus_banks),
 };
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c 
b/drivers/pinctrl/meson/pinctrl-meson.c
index 11623c6..9678599 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -21,9 +21,8 @@
  * domain which can't be powered off; the bank also uses a set of
  * registers different from the other banks.
  *
- * For each of the two power domains (regular and always-on) there are
- * 4 different register ranges that control the following properties
- * of the pins:
+ * For each pin controller there are 4 different register ranges that
+ * control the following properties of the pins:
  *  1) pin muxing
  *  2) pull enable/disable
  *  3) pull up/down
@@ -33,8 +32,8 @@
  * direction are the same and thus there are only 3 register ranges.
  *
  * Every pinmux group can be enabled by a specific bit in the first
- * register range of the domain; when all groups for a given pin are
- * disabled the pin acts as a GPIO.
+ * register range; when all groups for a given pin are disabled the
+ * pin acts as a GPIO.
  *
  * For the pull and GPIO configuration every bank uses a contiguous
  * set of bits in the register sets described above; the same register
@@ -66,21 +65,21 @@
 /**
  * meson_get_bank() - find the bank containing a given pin
  *
- * @domain:the domain containing the pin
+ * @pc:the pinctrl instance
  * @pin:   the pin number
  * @bank:  the found bank
  *
  * Return: 0 on success, a negative value on error
  */
-static int meson_get_bank(struct meson_domain *domain, unsigned int pin,
+static int meson_get_bank(struct meson_pinctrl *pc, unsigned int pin,
  struct meson_bank **bank)
 {
int i;
 
-   for (i = 0; i < domain->data->num_banks; i++) {
-   if (pin 

Re: [dm-devel] [RFC PATCH 2/2] mm, mempool: do not throttle PF_LESS_THROTTLE tasks

2016-08-13 Thread Mikulas Patocka


On Fri, 12 Aug 2016, Michal Hocko wrote:

> On Thu 04-08-16 14:49:41, Mikulas Patocka wrote:
> 
> > On Wed, 3 Aug 2016, Michal Hocko wrote:
> > 
> > > But the device congestion is not the only condition required for the
> > > throttling. The pgdat has also be marked congested which means that the
> > > LRU page scanner bumped into dirty/writeback/pg_reclaim pages at the
> > > tail of the LRU. That should only happen if we are rotating LRUs too
> > > quickly. AFAIU the reclaim shouldn't allow free ticket scanning in that
> > > situation.
> > 
> > The obvious problem here is that mempool allocations should sleep in 
> > mempool_alloc() on >wait (until someone returns some entries into 
> > the mempool), they should not sleep inside the page allocator.
> 
> I agree that mempool_alloc should _primarily_ sleep on their own
> throttling mechanism. I am not questioning that. I am just saying that
> the page allocator has its own throttling which it relies on and that
> cannot be just ignored because that might have other undesirable side
> effects. So if the right approach is really to never throttle certain
> requests then we have to bail out from a congested nodes/zones as soon
> as the congestion is detected.
> 
> Now, I would like to see that something like that is _really_ necessary.

Currently, it is not a problem - device mapper reports the device as 
congested only if the underlying physical disks are congested.

But once we change it so that device mapper reports congested state on its 
own (when it has too many bios in progress), this starts being a problem.

I would add PF_NO_THROTTLE or __GFP_NO_THROTTLE to mempool_alloc.

Or - we can prevent the memory reclaim from throttling if we see both 
__GFP_NOMEMALLOC and __GFP_NORETRY - that would be sufficient to detect 
mempool_alloc usage and it wouldn't hurt other __GFP_NORETRY users.

Mikulas

> I believe that we should simply start with easier part and get rid of
> throttle_vm_writeout because that seems like a left over from the past.
> If that turns out unsatisfactory and we have clear picture when the
> throttling is harmful/suboptimal then we can move on with a more complex
> solution. Does this sound like a way forward?
> 
> -- 
> Michal Hocko
> SUSE Labs


Re: [dm-devel] [RFC PATCH 2/2] mm, mempool: do not throttle PF_LESS_THROTTLE tasks

2016-08-13 Thread Mikulas Patocka


On Fri, 12 Aug 2016, Michal Hocko wrote:

> On Thu 04-08-16 14:49:41, Mikulas Patocka wrote:
> 
> > On Wed, 3 Aug 2016, Michal Hocko wrote:
> > 
> > > But the device congestion is not the only condition required for the
> > > throttling. The pgdat has also be marked congested which means that the
> > > LRU page scanner bumped into dirty/writeback/pg_reclaim pages at the
> > > tail of the LRU. That should only happen if we are rotating LRUs too
> > > quickly. AFAIU the reclaim shouldn't allow free ticket scanning in that
> > > situation.
> > 
> > The obvious problem here is that mempool allocations should sleep in 
> > mempool_alloc() on >wait (until someone returns some entries into 
> > the mempool), they should not sleep inside the page allocator.
> 
> I agree that mempool_alloc should _primarily_ sleep on their own
> throttling mechanism. I am not questioning that. I am just saying that
> the page allocator has its own throttling which it relies on and that
> cannot be just ignored because that might have other undesirable side
> effects. So if the right approach is really to never throttle certain
> requests then we have to bail out from a congested nodes/zones as soon
> as the congestion is detected.
> 
> Now, I would like to see that something like that is _really_ necessary.

Currently, it is not a problem - device mapper reports the device as 
congested only if the underlying physical disks are congested.

But once we change it so that device mapper reports congested state on its 
own (when it has too many bios in progress), this starts being a problem.

I would add PF_NO_THROTTLE or __GFP_NO_THROTTLE to mempool_alloc.

Or - we can prevent the memory reclaim from throttling if we see both 
__GFP_NOMEMALLOC and __GFP_NORETRY - that would be sufficient to detect 
mempool_alloc usage and it wouldn't hurt other __GFP_NORETRY users.

Mikulas

> I believe that we should simply start with easier part and get rid of
> throttle_vm_writeout because that seems like a left over from the past.
> If that turns out unsatisfactory and we have clear picture when the
> throttling is harmful/suboptimal then we can move on with a more complex
> solution. Does this sound like a way forward?
> 
> -- 
> Michal Hocko
> SUSE Labs


Re: staging: ks7010: Replace three printk() calls by pr_err()

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 13:10 +0200, SF Markus Elfring wrote:
> > > Prefer usage of the macro "pr_err" over the interface "printk".
> > Not correct
> A checkpatch warning like "PREFER_PR_LEVEL" can point additional 
> possibilities out
> for this use case.
> Would you like to introduce any of the higher level logging functions instead?

I think pr_ is OK if reworking the code
to support dev_ is not easy.

> > > diff --git a/drivers/staging/ks7010/ks7010_sdio.c 
> > > b/drivers/staging/ks7010/ks7010_sdio.c
> > []
> > > 
> > > @@ -998,11 +998,11 @@ static int ks7010_sdio_probe(struct sdio_func *func,
> > >   /* private memory allocate */
> > >   netdev = alloc_etherdev(sizeof(*priv));
> > >   if (netdev == NULL) {
> > > - printk(KERN_ERR "ks7010 : Unable to alloc new net device\n");
> > > + pr_err(pr_fmt("Unable to alloc new net device\n"));
> > All of these pr_fmt uses are redundant as pr_err already does pr_fmt
> Thanks for your reminder.
> 
> Would you accept that another update will be appended to the discussed patch 
> series?

No.  Patches should not knowingly introduce defects
that are corrected in follow-on patches.

> > alloc_etherdev already does a dump_stack so the OOM isn't useful.
> Does this information indicate that this printk() (or pr_err()) call
> should be deleted?

Markus, I don't know if it's your lack of English
comprehension or not, but it's fairly obvious from
my reply that this line should be deleted, either
in this patch or a follow-on.



Re: staging: ks7010: Replace three printk() calls by pr_err()

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 13:10 +0200, SF Markus Elfring wrote:
> > > Prefer usage of the macro "pr_err" over the interface "printk".
> > Not correct
> A checkpatch warning like "PREFER_PR_LEVEL" can point additional 
> possibilities out
> for this use case.
> Would you like to introduce any of the higher level logging functions instead?

I think pr_ is OK if reworking the code
to support dev_ is not easy.

> > > diff --git a/drivers/staging/ks7010/ks7010_sdio.c 
> > > b/drivers/staging/ks7010/ks7010_sdio.c
> > []
> > > 
> > > @@ -998,11 +998,11 @@ static int ks7010_sdio_probe(struct sdio_func *func,
> > >   /* private memory allocate */
> > >   netdev = alloc_etherdev(sizeof(*priv));
> > >   if (netdev == NULL) {
> > > - printk(KERN_ERR "ks7010 : Unable to alloc new net device\n");
> > > + pr_err(pr_fmt("Unable to alloc new net device\n"));
> > All of these pr_fmt uses are redundant as pr_err already does pr_fmt
> Thanks for your reminder.
> 
> Would you accept that another update will be appended to the discussed patch 
> series?

No.  Patches should not knowingly introduce defects
that are corrected in follow-on patches.

> > alloc_etherdev already does a dump_stack so the OOM isn't useful.
> Does this information indicate that this printk() (or pr_err()) call
> should be deleted?

Markus, I don't know if it's your lack of English
comprehension or not, but it's fairly obvious from
my reply that this line should be deleted, either
in this patch or a follow-on.



Re: [PATCH v3 0/7] x86: Rewrite switch_to()

2016-08-13 Thread Linus Torvalds
On Sat, Aug 13, 2016 at 9:38 AM, Brian Gerst  wrote:
> This patch set simplifies the switch_to() code, by moving the stack switch
> code out of line into an asm stub before calling __switch_to().  This ends
> up being more readable, and using the C calling convention instead of
> clobbering all registers improves code generation.  It also allows newly
> forked processes to construct a special stack frame to seamlessly flow
> to ret_from_fork, instead of using a test and branch, or an unbalanced
> call/ret.

Do you have performance numbers? Is it noticeable/measurable?

 Linus


Re: [PATCH v3 0/7] x86: Rewrite switch_to()

2016-08-13 Thread Linus Torvalds
On Sat, Aug 13, 2016 at 9:38 AM, Brian Gerst  wrote:
> This patch set simplifies the switch_to() code, by moving the stack switch
> code out of line into an asm stub before calling __switch_to().  This ends
> up being more readable, and using the C calling convention instead of
> clobbering all registers improves code generation.  It also allows newly
> forked processes to construct a special stack frame to seamlessly flow
> to ret_from_fork, instead of using a test and branch, or an unbalanced
> call/ret.

Do you have performance numbers? Is it noticeable/measurable?

 Linus


[PATCH 1/3] befs: fix typos in datastream.c

2016-08-13 Thread Luis de Bethencourt
Signed-off-by: Luis de Bethencourt 
---

Hi,

This is a series of patches fixing small issues in datastream.c.

On the process of doing the same for the rest of files. To finish cleanup
and start adding documentation and new features.

Thanks,
Luis

 fs/befs/datastream.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 6889644..b2eb5b5 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -37,7 +37,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
  * @sb: Filesystem superblock
- * @ds: datastrem to find data with
+ * @ds: datastream to find data with
  * @pos: start of data
  * @off: offset of data in buffer_head->b_data
  *
@@ -115,7 +115,7 @@ befs_fblock2brun(struct super_block *sb, const 
befs_data_stream *data,
 /**
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
- * @ds: Datastrem to read from
+ * @ds: Datastream to read from
  * @buff: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
@@ -183,7 +183,7 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
metablocks += ds->indirect.len;
 
/*
-  Double indir block, plus all the indirect blocks it mapps
+  Double indir block, plus all the indirect blocks it maps.
   In the double-indirect range, all block runs of data are
   BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
   how many data block runs are in the double-indirect region,
@@ -397,7 +397,7 @@ befs_find_brun_indirect(struct super_block *sb,
though the double-indirect run may be several blocks long, 
we can calculate which of those blocks will contain the index
we are after and only read that one. We then follow it to 
-   the indirect block and perform a  similar process to find
+   the indirect block and perform a similar process to find
the actual block run that maps the data block we are interested
in.

-- 
2.5.1



[PATCH 1/3] befs: fix typos in datastream.c

2016-08-13 Thread Luis de Bethencourt
Signed-off-by: Luis de Bethencourt 
---

Hi,

This is a series of patches fixing small issues in datastream.c.

On the process of doing the same for the rest of files. To finish cleanup
and start adding documentation and new features.

Thanks,
Luis

 fs/befs/datastream.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 6889644..b2eb5b5 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -37,7 +37,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
  * @sb: Filesystem superblock
- * @ds: datastrem to find data with
+ * @ds: datastream to find data with
  * @pos: start of data
  * @off: offset of data in buffer_head->b_data
  *
@@ -115,7 +115,7 @@ befs_fblock2brun(struct super_block *sb, const 
befs_data_stream *data,
 /**
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
- * @ds: Datastrem to read from
+ * @ds: Datastream to read from
  * @buff: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
@@ -183,7 +183,7 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
metablocks += ds->indirect.len;
 
/*
-  Double indir block, plus all the indirect blocks it mapps
+  Double indir block, plus all the indirect blocks it maps.
   In the double-indirect range, all block runs of data are
   BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
   how many data block runs are in the double-indirect region,
@@ -397,7 +397,7 @@ befs_find_brun_indirect(struct super_block *sb,
though the double-indirect run may be several blocks long, 
we can calculate which of those blocks will contain the index
we are after and only read that one. We then follow it to 
-   the indirect block and perform a  similar process to find
+   the indirect block and perform a similar process to find
the actual block run that maps the data block we are interested
in.

-- 
2.5.1



[PATCH 2/3] befs: improve documentation in datastream.c

2016-08-13 Thread Luis de Bethencourt
Convert function descriptions to kernel-doc style.

Signed-off-by: Luis de Bethencourt 
---
 fs/befs/datastream.c | 193 ++-
 1 file changed, 98 insertions(+), 95 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index b2eb5b5..5ce85cf 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const 
befs_data_stream *ds,
return bh;
 }
 
-/*
+/**
+ * befs_fblock2brun - give back block run for fblock
+ * @sb: the superblock
+ * @data: datastream to read from
+ * @fblock: the blocknumber with the file position to find
+ * @run: The found run is passed back through this pointer
+ *
  * Takes a file position and gives back a brun who's starting block
  * is block number fblock of the file.
  * 
@@ -212,36 +218,35 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
return blocks;
 }
 
-/*
-   Finds the block run that starts at file block number blockno
-   in the file represented by the datastream data, if that 
-   blockno is in the direct region of the datastream.
-   
-   sb: the superblock
-   data: the datastream
-   blockno: the blocknumber to find
-   run: The found run is passed back through this pointer
-   
-   Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-   otherwise.
-   
-   Algorithm:
-   Linear search. Checks each element of array[] to see if it
-   contains the blockno-th filesystem block. This is necessary
-   because the block runs map variable amounts of data. Simply
-   keeps a count of the number of blocks searched so far (sum),
-   incrementing this by the length of each block run as we come
-   across it. Adds sum to *count before returning (this is so
-   you can search multiple arrays that are logicaly one array,
-   as in the indirect region code).
-   
-   When/if blockno is found, if blockno is inside of a block 
-   run as stored on disk, we offset the start and length members
-   of the block run, so that blockno is the start and len is
-   still valid (the run ends in the same place).
-   
-   2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_direct - find a direct block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the direct region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * Linear search. Checks each element of array[] to see if it
+ * contains the blockno-th filesystem block. This is necessary
+ * because the block runs map variable amounts of data. Simply
+ * keeps a count of the number of blocks searched so far (sum),
+ * incrementing this by the length of each block run as we come
+ * across it. Adds sum to *count before returning (this is so
+ * you can search multiple arrays that are logicaly one array,
+ * as in the indirect region code).
+ *
+ * When/if blockno is found, if blockno is inside of a block
+ * run as stored on disk, we offset the start and length members
+ * of the block run, so that blockno is the start and len is
+ * still valid (the run ends in the same place).
+ */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
  befs_blocknr_t blockno, befs_block_run * run)
@@ -273,29 +278,28 @@ befs_find_brun_direct(struct super_block *sb, const 
befs_data_stream *data,
return BEFS_ERR;
 }
 
-/*
-   Finds the block run that starts at file block number blockno
-   in the file represented by the datastream data, if that 
-   blockno is in the indirect region of the datastream.
-   
-   sb: the superblock
-   data: the datastream
-   blockno: the blocknumber to find
-   run: The found run is passed back through this pointer
-   
-   Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-   otherwise.
-   
-   Algorithm:
-   For each block in the indirect run of the datastream, read
-   it in and search through it for search_blk.
-   
-   XXX:
-   Really should check to make sure blockno is inside indirect
-   region.
-   
-   2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_indirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the indirect region of the datastream.
+ *
+ 

[PATCH 2/3] befs: improve documentation in datastream.c

2016-08-13 Thread Luis de Bethencourt
Convert function descriptions to kernel-doc style.

Signed-off-by: Luis de Bethencourt 
---
 fs/befs/datastream.c | 193 ++-
 1 file changed, 98 insertions(+), 95 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index b2eb5b5..5ce85cf 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const 
befs_data_stream *ds,
return bh;
 }
 
-/*
+/**
+ * befs_fblock2brun - give back block run for fblock
+ * @sb: the superblock
+ * @data: datastream to read from
+ * @fblock: the blocknumber with the file position to find
+ * @run: The found run is passed back through this pointer
+ *
  * Takes a file position and gives back a brun who's starting block
  * is block number fblock of the file.
  * 
@@ -212,36 +218,35 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
return blocks;
 }
 
-/*
-   Finds the block run that starts at file block number blockno
-   in the file represented by the datastream data, if that 
-   blockno is in the direct region of the datastream.
-   
-   sb: the superblock
-   data: the datastream
-   blockno: the blocknumber to find
-   run: The found run is passed back through this pointer
-   
-   Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-   otherwise.
-   
-   Algorithm:
-   Linear search. Checks each element of array[] to see if it
-   contains the blockno-th filesystem block. This is necessary
-   because the block runs map variable amounts of data. Simply
-   keeps a count of the number of blocks searched so far (sum),
-   incrementing this by the length of each block run as we come
-   across it. Adds sum to *count before returning (this is so
-   you can search multiple arrays that are logicaly one array,
-   as in the indirect region code).
-   
-   When/if blockno is found, if blockno is inside of a block 
-   run as stored on disk, we offset the start and length members
-   of the block run, so that blockno is the start and len is
-   still valid (the run ends in the same place).
-   
-   2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_direct - find a direct block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the direct region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * Linear search. Checks each element of array[] to see if it
+ * contains the blockno-th filesystem block. This is necessary
+ * because the block runs map variable amounts of data. Simply
+ * keeps a count of the number of blocks searched so far (sum),
+ * incrementing this by the length of each block run as we come
+ * across it. Adds sum to *count before returning (this is so
+ * you can search multiple arrays that are logicaly one array,
+ * as in the indirect region code).
+ *
+ * When/if blockno is found, if blockno is inside of a block
+ * run as stored on disk, we offset the start and length members
+ * of the block run, so that blockno is the start and len is
+ * still valid (the run ends in the same place).
+ */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
  befs_blocknr_t blockno, befs_block_run * run)
@@ -273,29 +278,28 @@ befs_find_brun_direct(struct super_block *sb, const 
befs_data_stream *data,
return BEFS_ERR;
 }
 
-/*
-   Finds the block run that starts at file block number blockno
-   in the file represented by the datastream data, if that 
-   blockno is in the indirect region of the datastream.
-   
-   sb: the superblock
-   data: the datastream
-   blockno: the blocknumber to find
-   run: The found run is passed back through this pointer
-   
-   Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-   otherwise.
-   
-   Algorithm:
-   For each block in the indirect run of the datastream, read
-   it in and search through it for search_blk.
-   
-   XXX:
-   Really should check to make sure blockno is inside indirect
-   region.
-   
-   2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_indirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the indirect region of the datastream.
+ *
+ * Return value is 

[PATCH 3/3] befs: befs: fix style issues in datastream.c

2016-08-13 Thread Luis de Bethencourt
Fixing the following checkpatch.pl errors:

ERROR: "foo * bar" should be "foo *bar"
+befs_blocknr_t blockno, befs_block_run * run);

WARNING: Missing a blank line after declarations
+   struct buffer_head *bh;
+   befs_debug(sb, "---> %s length: %llu", __func__, len);

WARNING: Block comments use * on subsequent lines
+   /*
+  Double indir block, plus all the indirect blocks it maps.

(and other instances of these)

Signed-off-by: Luis de Bethencourt 
---
 fs/befs/datastream.c | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 5ce85cf..b4c7ba0 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -22,17 +22,17 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 };
 
 static int befs_find_brun_direct(struct super_block *sb,
 const befs_data_stream *data,
-befs_blocknr_t blockno, befs_block_run * run);
+befs_blocknr_t blockno, befs_block_run *run);
 
 static int befs_find_brun_indirect(struct super_block *sb,
   const befs_data_stream *data,
   befs_blocknr_t blockno,
-  befs_block_run * run);
+  befs_block_run *run);
 
 static int befs_find_brun_dblindirect(struct super_block *sb,
  const befs_data_stream *data,
  befs_blocknr_t blockno,
- befs_block_run * run);
+ befs_block_run *run);
 
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
@@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
  */
 struct buffer_head *
 befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
-befs_off_t pos, uint * off)
+befs_off_t pos, uint *off)
 {
struct buffer_head *bh;
befs_block_run run;
@@ -94,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const 
befs_data_stream *ds,
  */
 int
 befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
-befs_blocknr_t fblock, befs_block_run * run)
+befs_blocknr_t fblock, befs_block_run *run)
 {
int err;
befs_off_t pos = fblock << BEFS_SB(sb)->block_shift;
@@ -134,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const 
befs_data_stream *ds,
befs_off_t bytes_read = 0;  /* bytes readed */
u16 plen;
struct buffer_head *bh;
+
befs_debug(sb, "---> %s length: %llu", __func__, len);
 
while (bytes_read < len) {
@@ -189,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
metablocks += ds->indirect.len;
 
/*
-  Double indir block, plus all the indirect blocks it maps.
-  In the double-indirect range, all block runs of data are
-  BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
-  how many data block runs are in the double-indirect region,
-  and from that we know how many indirect blocks it takes to
-  map them. We assume that the indirect blocks are also
-  BEFS_DBLINDIR_BRUN_LEN blocks long.
+* Double indir block, plus all the indirect blocks it maps.
+* In the double-indirect range, all block runs of data are
+* BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
+* how many data block runs are in the double-indirect region,
+* and from that we know how many indirect blocks it takes to
+* map them. We assume that the indirect blocks are also
+* BEFS_DBLINDIR_BRUN_LEN blocks long.
 */
if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) {
uint dbl_bytes;
@@ -249,7 +250,7 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
  */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
- befs_blocknr_t blockno, befs_block_run * run)
+ befs_blocknr_t blockno, befs_block_run *run)
 {
int i;
const befs_block_run *array = data->direct;
@@ -261,6 +262,7 @@ befs_find_brun_direct(struct super_block *sb, const 
befs_data_stream *data,
 sum += array[i].len, i++) {
if (blockno >= sum && blockno < sum + (array[i].len)) {
int offset = blockno - sum;
+
run->allocation_group = array[i].allocation_group;
run->start = array[i].start + offset;
run->len = array[i].len - offset;
@@ -304,7 +306,7 @@ static int
 befs_find_brun_indirect(struct super_block *sb,
   

[PATCH 3/3] befs: befs: fix style issues in datastream.c

2016-08-13 Thread Luis de Bethencourt
Fixing the following checkpatch.pl errors:

ERROR: "foo * bar" should be "foo *bar"
+befs_blocknr_t blockno, befs_block_run * run);

WARNING: Missing a blank line after declarations
+   struct buffer_head *bh;
+   befs_debug(sb, "---> %s length: %llu", __func__, len);

WARNING: Block comments use * on subsequent lines
+   /*
+  Double indir block, plus all the indirect blocks it maps.

(and other instances of these)

Signed-off-by: Luis de Bethencourt 
---
 fs/befs/datastream.c | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 5ce85cf..b4c7ba0 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -22,17 +22,17 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 };
 
 static int befs_find_brun_direct(struct super_block *sb,
 const befs_data_stream *data,
-befs_blocknr_t blockno, befs_block_run * run);
+befs_blocknr_t blockno, befs_block_run *run);
 
 static int befs_find_brun_indirect(struct super_block *sb,
   const befs_data_stream *data,
   befs_blocknr_t blockno,
-  befs_block_run * run);
+  befs_block_run *run);
 
 static int befs_find_brun_dblindirect(struct super_block *sb,
  const befs_data_stream *data,
  befs_blocknr_t blockno,
- befs_block_run * run);
+ befs_block_run *run);
 
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
@@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
  */
 struct buffer_head *
 befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
-befs_off_t pos, uint * off)
+befs_off_t pos, uint *off)
 {
struct buffer_head *bh;
befs_block_run run;
@@ -94,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const 
befs_data_stream *ds,
  */
 int
 befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
-befs_blocknr_t fblock, befs_block_run * run)
+befs_blocknr_t fblock, befs_block_run *run)
 {
int err;
befs_off_t pos = fblock << BEFS_SB(sb)->block_shift;
@@ -134,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const 
befs_data_stream *ds,
befs_off_t bytes_read = 0;  /* bytes readed */
u16 plen;
struct buffer_head *bh;
+
befs_debug(sb, "---> %s length: %llu", __func__, len);
 
while (bytes_read < len) {
@@ -189,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
metablocks += ds->indirect.len;
 
/*
-  Double indir block, plus all the indirect blocks it maps.
-  In the double-indirect range, all block runs of data are
-  BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
-  how many data block runs are in the double-indirect region,
-  and from that we know how many indirect blocks it takes to
-  map them. We assume that the indirect blocks are also
-  BEFS_DBLINDIR_BRUN_LEN blocks long.
+* Double indir block, plus all the indirect blocks it maps.
+* In the double-indirect range, all block runs of data are
+* BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
+* how many data block runs are in the double-indirect region,
+* and from that we know how many indirect blocks it takes to
+* map them. We assume that the indirect blocks are also
+* BEFS_DBLINDIR_BRUN_LEN blocks long.
 */
if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) {
uint dbl_bytes;
@@ -249,7 +250,7 @@ befs_count_blocks(struct super_block *sb, const 
befs_data_stream *ds)
  */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
- befs_blocknr_t blockno, befs_block_run * run)
+ befs_blocknr_t blockno, befs_block_run *run)
 {
int i;
const befs_block_run *array = data->direct;
@@ -261,6 +262,7 @@ befs_find_brun_direct(struct super_block *sb, const 
befs_data_stream *data,
 sum += array[i].len, i++) {
if (blockno >= sum && blockno < sum + (array[i].len)) {
int offset = blockno - sum;
+
run->allocation_group = array[i].allocation_group;
run->start = array[i].start + offset;
run->len = array[i].len - offset;
@@ -304,7 +306,7 @@ static int
 befs_find_brun_indirect(struct super_block *sb,
const 

Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs

2016-08-13 Thread Oleg Nesterov
Forgot to mention...

On 08/12, Bart Van Assche wrote:
>
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1643,7 +1643,12 @@ find_page:
>* wait_on_page_locked is used to avoid unnecessarily
>* serialisations and why it's safe.
>*/
> - wait_on_page_locked_killable(page);
> + error = wait_on_page_locked_killable(page);
> + if (error == -EINTR) {
> + put_page(page);
> + goto out;
> + }
> + error = 0;

This change probably makes sense regardless although I'd suggest to
simplify it:

-   wait_on_page_locked_killable(page);
+   error = wait_on_page_locked_killable(page);
+   if (unlikely(error))
+   goto readpage_error;


but it looks off-topic. And the changelog looks misleading/wrong.

I do not think this change makes sense in this debugging session,

Oleg.



Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs

2016-08-13 Thread Oleg Nesterov
Forgot to mention...

On 08/12, Bart Van Assche wrote:
>
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1643,7 +1643,12 @@ find_page:
>* wait_on_page_locked is used to avoid unnecessarily
>* serialisations and why it's safe.
>*/
> - wait_on_page_locked_killable(page);
> + error = wait_on_page_locked_killable(page);
> + if (error == -EINTR) {
> + put_page(page);
> + goto out;
> + }
> + error = 0;

This change probably makes sense regardless although I'd suggest to
simplify it:

-   wait_on_page_locked_killable(page);
+   error = wait_on_page_locked_killable(page);
+   if (unlikely(error))
+   goto readpage_error;


but it looks off-topic. And the changelog looks misleading/wrong.

I do not think this change makes sense in this debugging session,

Oleg.



Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 09:41 -0700, Joe Perches wrote:
> On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote:
> > Le 13/08/2016 à 13:35, Joe Perches a écrit :
> > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
> > > >     _cmd.dma);
> > > >     if (nonemb_cmd.va == NULL) {
> > > >     beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
> > > > -   "BM_%d : Failed to allocate memory for"
> > > > +   "BM_%d : Failed to allocate memory for "
> > > >     "mgmt_invalidate_icds\n");

This is the first time I've looked at the beiscsi_log macro.

It sure is odd and undesirable.

It's _very_ not nice to have a format string take an implied
__LINE__ argument.

It'd be much more intelligible to take the first bit as a
separate string, concatenate it in the macro with "_%d: "
and __LINE__ (if that's really useful, I think it's not)
and emit that as the format.

Something like:

diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 30a4606..3f0fbbf 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -1084,11 +1084,12 @@ struct hwi_context_memory {
 #define __beiscsi_log(phba, level, fmt, arg...) \
    shost_printk(level, phba->shost, fmt, __LINE__, ##arg)
 
-#define beiscsi_log(phba, level, mask, fmt, arg...) \
-do { \
-   uint32_t log_value = phba->attr_log_enable; \
-   if (((mask) & log_value) || (level[1] <= '3')) \
-   __beiscsi_log(phba, level, fmt, ##arg); \
-} while (0);
+#define beiscsi_log(phba, level, mask, prefix, fmt, ...)   \
+do {   \
+   uint32_t log_value = phba->attr_log_enable; \
+   if (((mask) & log_value) || (level[1] <= '3'))  \
+   __beiscsi_log(phba, level, prefix "_%d: " fmt,  \
+     ##__VA_ARGS__);   \
+} while (0)
 
 #endif

So these beiscsi_log uses become something like:

beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
"BM", "Failed to allocate memory for 
mgmt_invalidate_icds\n");

and the format and its arguments match.



Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 09:41 -0700, Joe Perches wrote:
> On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote:
> > Le 13/08/2016 à 13:35, Joe Perches a écrit :
> > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
> > > >     _cmd.dma);
> > > >     if (nonemb_cmd.va == NULL) {
> > > >     beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
> > > > -   "BM_%d : Failed to allocate memory for"
> > > > +   "BM_%d : Failed to allocate memory for "
> > > >     "mgmt_invalidate_icds\n");

This is the first time I've looked at the beiscsi_log macro.

It sure is odd and undesirable.

It's _very_ not nice to have a format string take an implied
__LINE__ argument.

It'd be much more intelligible to take the first bit as a
separate string, concatenate it in the macro with "_%d: "
and __LINE__ (if that's really useful, I think it's not)
and emit that as the format.

Something like:

diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 30a4606..3f0fbbf 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -1084,11 +1084,12 @@ struct hwi_context_memory {
 #define __beiscsi_log(phba, level, fmt, arg...) \
    shost_printk(level, phba->shost, fmt, __LINE__, ##arg)
 
-#define beiscsi_log(phba, level, mask, fmt, arg...) \
-do { \
-   uint32_t log_value = phba->attr_log_enable; \
-   if (((mask) & log_value) || (level[1] <= '3')) \
-   __beiscsi_log(phba, level, fmt, ##arg); \
-} while (0);
+#define beiscsi_log(phba, level, mask, prefix, fmt, ...)   \
+do {   \
+   uint32_t log_value = phba->attr_log_enable; \
+   if (((mask) & log_value) || (level[1] <= '3'))  \
+   __beiscsi_log(phba, level, prefix "_%d: " fmt,  \
+     ##__VA_ARGS__);   \
+} while (0)
 
 #endif

So these beiscsi_log uses become something like:

beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
"BM", "Failed to allocate memory for 
mgmt_invalidate_icds\n");

and the format and its arguments match.



[PATCH] android: binder: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue is being used to run deferred work for the android binder.

The "binder_deferred_workqueue" queues only a single work item and hence
does not require ordering. Also, this workqueue is not being used on a
memory recliam path. Hence, the singlethreaded workqueue has been
replaced with the use of system_wq.

System workqueues have been able to handle high level of concurrency
for a long time now and hence it's not required to have a singlethreaded
workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue
created with create_singlethread_workqueue(), system_wq allows multiple
work items to overlap executions even on the same CPU; however, a
per-cpu workqueue doesn't have any CPU locality or global ordering
guarantee unless the target CPU is explicitly specified and thus the
increase of local concurrency shouldn't make any difference.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/android/binder.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 16288e7..562af94 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -59,7 +59,6 @@ static struct dentry *binder_debugfs_dir_entry_proc;
 static struct binder_node *binder_context_mgr_node;
 static kuid_t binder_context_mgr_uid = INVALID_UID;
 static int binder_last_id;
-static struct workqueue_struct *binder_deferred_workqueue;

 #define BINDER_DEBUG_ENTRY(name) \
 static int binder_##name##_open(struct inode *inode, struct file *file) \
@@ -3227,7 +3226,7 @@ binder_defer_work(struct binder_proc *proc, enum 
binder_deferred_state defer)
if (hlist_unhashed(>deferred_work_node)) {
hlist_add_head(>deferred_work_node,
_deferred_list);
-   queue_work(binder_deferred_workqueue, _deferred_work);
+   schedule_work(_deferred_work);
}
mutex_unlock(_deferred_lock);
 }
@@ -3679,10 +3678,6 @@ static int __init binder_init(void)
 {
int ret;

-   binder_deferred_workqueue = create_singlethread_workqueue("binder");
-   if (!binder_deferred_workqueue)
-   return -ENOMEM;
-
binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL);
if (binder_debugfs_dir_entry_root)
binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
--
2.1.4



[PATCH] android: binder: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue is being used to run deferred work for the android binder.

The "binder_deferred_workqueue" queues only a single work item and hence
does not require ordering. Also, this workqueue is not being used on a
memory recliam path. Hence, the singlethreaded workqueue has been
replaced with the use of system_wq.

System workqueues have been able to handle high level of concurrency
for a long time now and hence it's not required to have a singlethreaded
workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue
created with create_singlethread_workqueue(), system_wq allows multiple
work items to overlap executions even on the same CPU; however, a
per-cpu workqueue doesn't have any CPU locality or global ordering
guarantee unless the target CPU is explicitly specified and thus the
increase of local concurrency shouldn't make any difference.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/android/binder.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 16288e7..562af94 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -59,7 +59,6 @@ static struct dentry *binder_debugfs_dir_entry_proc;
 static struct binder_node *binder_context_mgr_node;
 static kuid_t binder_context_mgr_uid = INVALID_UID;
 static int binder_last_id;
-static struct workqueue_struct *binder_deferred_workqueue;

 #define BINDER_DEBUG_ENTRY(name) \
 static int binder_##name##_open(struct inode *inode, struct file *file) \
@@ -3227,7 +3226,7 @@ binder_defer_work(struct binder_proc *proc, enum 
binder_deferred_state defer)
if (hlist_unhashed(>deferred_work_node)) {
hlist_add_head(>deferred_work_node,
_deferred_list);
-   queue_work(binder_deferred_workqueue, _deferred_work);
+   schedule_work(_deferred_work);
}
mutex_unlock(_deferred_lock);
 }
@@ -3679,10 +3678,6 @@ static int __init binder_init(void)
 {
int ret;

-   binder_deferred_workqueue = create_singlethread_workqueue("binder");
-   if (!binder_deferred_workqueue)
-   return -ENOMEM;
-
binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL);
if (binder_debugfs_dir_entry_root)
binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
--
2.1.4



Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote:
> Le 13/08/2016 à 13:35, Joe Perches a écrit :
> > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
> > >   _cmd.dma);
> > >   if (nonemb_cmd.va == NULL) {
> > >   beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
> > > - "BM_%d : Failed to allocate memory for"
> > > + "BM_%d : Failed to allocate memory for "
> > >   "mgmt_invalidate_icds\n");
> > doesn't match commit log as no coalescing/concatenation
> > is done.
> > 
> > There are many of these.
> > 
> I have *only* fixed the one reported by checkpatch and left the others 
> unchanged.
> 
> My initial proposal was to fix incorrect strings, without modifying too 
> much the code. So I decided to do the minimum of changes.
> 
> Should I resubmitted with:
> - all strings *in the patch* concatenated?
> - all strings *in the file*" concatenated?

Hello Christophe

You don't _have_ to do anything.

I think the commit message is misleading.

You could submit another patch that does
the equivalent of:

$ ./scripts/checkpatch.pl --types=SPLIT_STRING --fix-inplace 
drivers/scsi/be2iscsi/be_main.c

with the appropriate commit message



Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages

2016-08-13 Thread Joe Perches
On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote:
> Le 13/08/2016 à 13:35, Joe Perches a écrit :
> > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
> > >   _cmd.dma);
> > >   if (nonemb_cmd.va == NULL) {
> > >   beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH,
> > > - "BM_%d : Failed to allocate memory for"
> > > + "BM_%d : Failed to allocate memory for "
> > >   "mgmt_invalidate_icds\n");
> > doesn't match commit log as no coalescing/concatenation
> > is done.
> > 
> > There are many of these.
> > 
> I have *only* fixed the one reported by checkpatch and left the others 
> unchanged.
> 
> My initial proposal was to fix incorrect strings, without modifying too 
> much the code. So I decided to do the minimum of changes.
> 
> Should I resubmitted with:
> - all strings *in the patch* concatenated?
> - all strings *in the file*" concatenated?

Hello Christophe

You don't _have_ to do anything.

I think the commit message is misleading.

You could submit another patch that does
the equivalent of:

$ ./scripts/checkpatch.pl --types=SPLIT_STRING --fix-inplace 
drivers/scsi/be2iscsi/be_main.c

with the appropriate commit message



[PATCH] edac: wq: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

wq has been identity converted since it is used to detect things like
ECC memory errors.  It's not recommended to stall it from memory pressure.
Hence, WQ_MEM_RECLAIM has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/edac/wq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c
index 1b8c07e..2a9a11a 100644
--- a/drivers/edac/wq.c
+++ b/drivers/edac/wq.c
@@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(edac_stop_work);

 int edac_workqueue_setup(void)
 {
-   wq = create_singlethread_workqueue("edac-poller");
+   wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM);
if (!wq)
return -ENODEV;
else
--
2.1.4



[PATCH] edac: wq: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

wq has been identity converted since it is used to detect things like
ECC memory errors.  It's not recommended to stall it from memory pressure.
Hence, WQ_MEM_RECLAIM has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/edac/wq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c
index 1b8c07e..2a9a11a 100644
--- a/drivers/edac/wq.c
+++ b/drivers/edac/wq.c
@@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(edac_stop_work);

 int edac_workqueue_setup(void)
 {
-   wq = create_singlethread_workqueue("edac-poller");
+   wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM);
if (!wq)
return -ENODEV;
else
--
2.1.4



[PATCH v3 6/7] x86: Fix thread_saved_pc()

2016-08-13 Thread Brian Gerst
thread_saved_pc() was using a completely bogus method to get the return
address.  Since switch_to() was previously inlined, there was no sane way
to know where on the stack the return address was stored.  Now with the
frame of a sleeping thread well defined, this can be implemented correctly.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/processor.h | 10 ++
 arch/x86/kernel/process.c| 11 +++
 arch/x86/kernel/process_32.c |  8 
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6fee863..b22fb5a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -721,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x)
.addr_limit = KERNEL_DS,  \
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /*
  * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
@@ -773,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct 
*tsk);
.addr_limit = KERNEL_DS,\
 }
 
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long 
*)((t)->thread.sp - 8))
-
 #define task_pt_regs(tsk)  ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
   unsigned long new_sp);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0115a4a..c1fa790 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -514,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+   struct inactive_task_frame *frame =
+   (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+   return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
+/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 18714a1..404efdf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,14 +55,6 @@
 #include 
 #include 
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-   return ((unsigned long *)tsk->thread.sp)[3];
-}
-
 void __show_regs(struct pt_regs *regs, int all)
 {
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-- 
2.5.5



[PATCH v3 6/7] x86: Fix thread_saved_pc()

2016-08-13 Thread Brian Gerst
thread_saved_pc() was using a completely bogus method to get the return
address.  Since switch_to() was previously inlined, there was no sane way
to know where on the stack the return address was stored.  Now with the
frame of a sleeping thread well defined, this can be implemented correctly.

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/processor.h | 10 ++
 arch/x86/kernel/process.c| 11 +++
 arch/x86/kernel/process_32.c |  8 
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6fee863..b22fb5a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -721,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x)
.addr_limit = KERNEL_DS,  \
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /*
  * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
@@ -773,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct 
*tsk);
.addr_limit = KERNEL_DS,\
 }
 
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long 
*)((t)->thread.sp - 8))
-
 #define task_pt_regs(tsk)  ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
   unsigned long new_sp);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0115a4a..c1fa790 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -514,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+   struct inactive_task_frame *frame =
+   (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+   return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
+/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 18714a1..404efdf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,14 +55,6 @@
 #include 
 #include 
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-   return ((unsigned long *)tsk->thread.sp)[3];
-}
-
 void __show_regs(struct pt_regs *regs, int all)
 {
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-- 
2.5.5



[PATCH v3 3/7] x86: Add struct inactive_task_frame

2016-08-13 Thread Brian Gerst
Add struct inactive_task_frame, which defines the layout of the stack for
a sleeping process.  For now, the only defined field is the BP register
(frame pointer).

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/stacktrace.h | 4 ++--
 arch/x86/include/asm/switch_to.h  | 5 +
 arch/x86/kernel/kgdb.c| 3 ++-
 arch/x86/kernel/process.c | 3 ++-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h 
b/arch/x86/include/asm/stacktrace.h
index 0944218..7646fb2 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 extern int kstack_depth_to_print;
 
@@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs)
return bp;
}
 
-   /* bp is the last reg pushed by switch_to */
-   return *(unsigned long *)task->thread.sp;
+   return ((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1..02de86e 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -8,6 +8,11 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
+/* data that is pointed to by thread.sp */
+struct inactive_task_frame {
+   unsigned long bp;
+};
+
 #ifdef CONFIG_X86_32
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5e3f294..8e36f24 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 {
@@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_DX]= 0;
gdb_regs[GDB_SI]= 0;
gdb_regs[GDB_DI]= 0;
-   gdb_regs[GDB_BP]= *(unsigned long *)p->thread.sp;
+   gdb_regs[GDB_BP]= ((struct inactive_task_frame 
*)p->thread.sp)->bp;
 #ifdef CONFIG_X86_32
gdb_regs[GDB_DS]= __KERNEL_DS;
gdb_regs[GDB_ES]= __KERNEL_DS;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0115a4a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p)
if (sp < bottom || sp > top)
return 0;
 
-   fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+   fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
do {
if (fp < bottom || fp > top)
return 0;
-- 
2.5.5



[PATCH v3 4/7] x86: Rewrite switch_to() code

2016-08-13 Thread Brian Gerst
Move the low-level context switch code to an out-of-line asm stub instead of
using complex inline asm.  This allows constructing a new stack frame for the
child process to make it seamlessly flow to ret_from_fork without an extra
test and branch in __switch_to().  It also improves code generation for
__schedule() by using the C calling convention instead of clobbering all
registers.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/entry_32.S  |  37 ++
 arch/x86/entry/entry_64.S  |  41 ++-
 arch/x86/include/asm/processor.h   |   3 -
 arch/x86/include/asm/switch_to.h   | 137 ++---
 arch/x86/include/asm/thread_info.h |   2 -
 arch/x86/kernel/asm-offsets.c  |   6 ++
 arch/x86/kernel/asm-offsets_32.c   |   5 ++
 arch/x86/kernel/asm-offsets_64.c   |   5 ++
 arch/x86/kernel/process_32.c   |   9 ++-
 arch/x86/kernel/process_64.c   |   9 ++-
 arch/x86/kernel/smpboot.c  |   1 -
 11 files changed, 125 insertions(+), 130 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b5..bf8f221 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,6 +204,43 @@
POP_GS_EX
 .endm
 
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+   /*
+* Save callee-saved registers
+* This must match the order in struct inactive_task_frame
+*/
+   pushl   %ebp
+   pushl   %ebx
+   pushl   %edi
+   pushl   %esi
+
+   /* switch stack */
+   movl%esp, TASK_threadsp(%eax)
+   movlTASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+   movlTASK_stack_canary(%edx), %ebx
+   movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+   /* restore callee-saved registers */
+   popl%esi
+   popl%edi
+   popl%ebx
+   popl%ebp
+
+   jmp __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ */
 ENTRY(ret_from_fork)
pushl   %eax
callschedule_tail
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f6b40e5..c1af8ac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -368,13 +368,48 @@ END(ptregs_\func)
 #include 
 
 /*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+   /*
+* Save callee-saved registers
+* This must match the order in inactive_task_frame
+*/
+   pushq   %rbp
+   pushq   %rbx
+   pushq   %r12
+   pushq   %r13
+   pushq   %r14
+   pushq   %r15
+
+   /* switch stack */
+   movq%rsp, TASK_threadsp(%rdi)
+   movqTASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+   movqTASK_stack_canary(%rsi), %rbx
+   movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+   /* restore callee-saved registers */
+   popq%r15
+   popq%r14
+   popq%r13
+   popq%r12
+   popq%rbx
+   popq%rbp
+
+   jmp __switch_to
+END(__switch_to_asm)
+
+/*
  * A newly forked process directly context switches into this address.
  *
- * rdi: prev task we switched from
+ * rax: prev task we switched from
  */
 ENTRY(ret_from_fork)
-   LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+   movq%rax, %rdi
callschedule_tail   /* rdi: 'prev' task parameter */
 
testb   $3, CS(%rsp)/* from kernel_thread? */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..6fee863 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,6 @@ struct thread_struct {
unsigned short  fsindex;
unsigned short  gsindex;
 #endif
-#ifdef CONFIG_X86_32
-   unsigned long   ip;
-#endif
 #ifdef CONFIG_X86_64
unsigned long   fsbase;
unsigned long   gsbase;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 02de86e..bf4e2ec 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,135 +2,40 @@
 #define _ASM_X86_SWITCH_TO_H
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+   struct task_struct *next);
+
 __visible struct task_struct *__switch_to(struct task_struct *prev,
-  struct task_struct *next);
+ struct task_struct *next);
 struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
 /* data that is pointed to by thread.sp */
 struct inactive_task_frame {

[PATCH v3 5/7] x86: Pass kernel thread parameters in fork_frame

2016-08-13 Thread Brian Gerst
Instead of setting up a fake pt_regs context, put the kernel thread
function pointer and arg into the unused callee-restored registers
of struct fork_frame.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/entry_32.S| 31 +++
 arch/x86/entry/entry_64.S| 37 +
 arch/x86/include/asm/switch_to.h |  2 ++
 arch/x86/kernel/process_32.c | 18 --
 arch/x86/kernel/process_64.c | 12 +++-
 5 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bf8f221..b75a8bc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -240,35 +240,34 @@ END(__switch_to_asm)
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
+ * ebx: kernel thread func (NULL for user thread)
+ * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
pushl   %eax
callschedule_tail
popl%eax
 
+   testl   %ebx, %ebx
+   jnz 1f  /* kernel threads are uncommon */
+
+2:
/* When we fork, we trace the syscall return in the child, too. */
movl%esp, %eax
callsyscall_return_slowpath
jmp restore_all
-END(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-   pushl   %eax
-   callschedule_tail
-   popl%eax
-   movlPT_EBP(%esp), %eax
-   call*PT_EBX(%esp)
-   movl$0, PT_EAX(%esp)
 
+   /* kernel thread */
+1: movl%edi, %eax
+   call*%ebx
/*
-* Kernel threads return to userspace as if returning from a syscall.
-* We should check whether anything actually uses this path and, if so,
-* consider switching it over to ret_from_fork.
+* A kernel thread is allowed to return here after successfully
+* calling do_execve().  Exit to userspace to complete the execve()
+* syscall.
 */
-   movl%esp, %eax
-   callsyscall_return_slowpath
-   jmp restore_all
-ENDPROC(ret_from_kernel_thread)
+   movl$0, PT_EAX(%esp)
+   jmp 2b
+END(ret_from_fork)
 
 /*
  * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c1af8ac..c0373d6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -407,37 +407,34 @@ END(__switch_to_asm)
  * A newly forked process directly context switches into this address.
  *
  * rax: prev task we switched from
+ * rbx: kernel thread func (NULL for user thread)
+ * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
movq%rax, %rdi
callschedule_tail   /* rdi: 'prev' task parameter */
 
-   testb   $3, CS(%rsp)/* from kernel_thread? */
-   jnz 1f
-
-   /*
-* We came from kernel_thread.  This code path is quite twisted, and
-* someone should clean it up.
-*
-* copy_thread_tls stashes the function pointer in RBX and the
-* parameter to be passed in RBP.  The called function is permitted
-* to call do_execve and thereby jump to user mode.
-*/
-   movqRBP(%rsp), %rdi
-   call*RBX(%rsp)
-   movl$0, RAX(%rsp)
-
-   /*
-* Fall through as though we're exiting a syscall.  This makes a
-* twisted sort of sense if we just called do_execve.
-*/
+   testq   %rbx, %rbx  /* from kernel_thread? */
+   jnz 1f  /* kernel threads are uncommon 
*/
 
-1:
+2:
movq%rsp, %rdi
callsyscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON   /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
+
+1:
+   /* kernel thread */
+   movq%r12, %rdi
+   call*%rbx
+   /*
+* A kernel thread is allowed to return here after successfully
+* calling do_execve().  Exit to userspace to complete the execve()
+* syscall.
+*/
+   movq$0, RAX(%rsp)
+   jmp 2b
 END(ret_from_fork)
 
 /*
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index bf4e2ec..33fb765 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,6 +12,8 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
+asmlinkage void ret_from_fork(void);
+
 /* data that is pointed to by thread.sp */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4bedbc0..18714a1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,9 +55,6 @@
 #include 
 #include 
 
-asmlinkage 

[PATCH v3 7/7] Revert "sched: Mark __schedule() stack frame as non-standard"

2016-08-13 Thread Brian Gerst
Now that the x86 switch_to() uses the standard C calling convention,
STACK_FRAME_NON_STANDARD is no longer needed.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Brian Gerst 
---
 kernel/sched/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b6b23c..dbf73db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3384,7 +3384,6 @@ static void __sched notrace __schedule(bool preempt)
 
balance_callback(rq);
 }
-STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
 
 static inline void sched_submit_work(struct task_struct *tsk)
 {
-- 
2.5.5



[PATCH v3 3/7] x86: Add struct inactive_task_frame

2016-08-13 Thread Brian Gerst
Add struct inactive_task_frame, which defines the layout of the stack for
a sleeping process.  For now, the only defined field is the BP register
(frame pointer).

Signed-off-by: Brian Gerst 
---
 arch/x86/include/asm/stacktrace.h | 4 ++--
 arch/x86/include/asm/switch_to.h  | 5 +
 arch/x86/kernel/kgdb.c| 3 ++-
 arch/x86/kernel/process.c | 3 ++-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h 
b/arch/x86/include/asm/stacktrace.h
index 0944218..7646fb2 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 extern int kstack_depth_to_print;
 
@@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs)
return bp;
}
 
-   /* bp is the last reg pushed by switch_to */
-   return *(unsigned long *)task->thread.sp;
+   return ((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1..02de86e 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -8,6 +8,11 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
+/* data that is pointed to by thread.sp */
+struct inactive_task_frame {
+   unsigned long bp;
+};
+
 #ifdef CONFIG_X86_32
 
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5e3f294..8e36f24 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 {
@@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_DX]= 0;
gdb_regs[GDB_SI]= 0;
gdb_regs[GDB_DI]= 0;
-   gdb_regs[GDB_BP]= *(unsigned long *)p->thread.sp;
+   gdb_regs[GDB_BP]= ((struct inactive_task_frame 
*)p->thread.sp)->bp;
 #ifdef CONFIG_X86_32
gdb_regs[GDB_DS]= __KERNEL_DS;
gdb_regs[GDB_ES]= __KERNEL_DS;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0115a4a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p)
if (sp < bottom || sp > top)
return 0;
 
-   fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+   fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
do {
if (fp < bottom || fp > top)
return 0;
-- 
2.5.5



[PATCH v3 4/7] x86: Rewrite switch_to() code

2016-08-13 Thread Brian Gerst
Move the low-level context switch code to an out-of-line asm stub instead of
using complex inline asm.  This allows constructing a new stack frame for the
child process to make it seamlessly flow to ret_from_fork without an extra
test and branch in __switch_to().  It also improves code generation for
__schedule() by using the C calling convention instead of clobbering all
registers.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/entry_32.S  |  37 ++
 arch/x86/entry/entry_64.S  |  41 ++-
 arch/x86/include/asm/processor.h   |   3 -
 arch/x86/include/asm/switch_to.h   | 137 ++---
 arch/x86/include/asm/thread_info.h |   2 -
 arch/x86/kernel/asm-offsets.c  |   6 ++
 arch/x86/kernel/asm-offsets_32.c   |   5 ++
 arch/x86/kernel/asm-offsets_64.c   |   5 ++
 arch/x86/kernel/process_32.c   |   9 ++-
 arch/x86/kernel/process_64.c   |   9 ++-
 arch/x86/kernel/smpboot.c  |   1 -
 11 files changed, 125 insertions(+), 130 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b5..bf8f221 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,6 +204,43 @@
POP_GS_EX
 .endm
 
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+   /*
+* Save callee-saved registers
+* This must match the order in struct inactive_task_frame
+*/
+   pushl   %ebp
+   pushl   %ebx
+   pushl   %edi
+   pushl   %esi
+
+   /* switch stack */
+   movl%esp, TASK_threadsp(%eax)
+   movlTASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+   movlTASK_stack_canary(%edx), %ebx
+   movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+   /* restore callee-saved registers */
+   popl%esi
+   popl%edi
+   popl%ebx
+   popl%ebp
+
+   jmp __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ */
 ENTRY(ret_from_fork)
pushl   %eax
callschedule_tail
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f6b40e5..c1af8ac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -368,13 +368,48 @@ END(ptregs_\func)
 #include 
 
 /*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+   /*
+* Save callee-saved registers
+* This must match the order in inactive_task_frame
+*/
+   pushq   %rbp
+   pushq   %rbx
+   pushq   %r12
+   pushq   %r13
+   pushq   %r14
+   pushq   %r15
+
+   /* switch stack */
+   movq%rsp, TASK_threadsp(%rdi)
+   movqTASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+   movqTASK_stack_canary(%rsi), %rbx
+   movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+   /* restore callee-saved registers */
+   popq%r15
+   popq%r14
+   popq%r13
+   popq%r12
+   popq%rbx
+   popq%rbp
+
+   jmp __switch_to
+END(__switch_to_asm)
+
+/*
  * A newly forked process directly context switches into this address.
  *
- * rdi: prev task we switched from
+ * rax: prev task we switched from
  */
 ENTRY(ret_from_fork)
-   LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+   movq%rax, %rdi
callschedule_tail   /* rdi: 'prev' task parameter */
 
testb   $3, CS(%rsp)/* from kernel_thread? */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..6fee863 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,6 @@ struct thread_struct {
unsigned short  fsindex;
unsigned short  gsindex;
 #endif
-#ifdef CONFIG_X86_32
-   unsigned long   ip;
-#endif
 #ifdef CONFIG_X86_64
unsigned long   fsbase;
unsigned long   gsbase;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 02de86e..bf4e2ec 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,135 +2,40 @@
 #define _ASM_X86_SWITCH_TO_H
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+   struct task_struct *next);
+
 __visible struct task_struct *__switch_to(struct task_struct *prev,
-  struct task_struct *next);
+ struct task_struct *next);
 struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
 /* data that is pointed to by thread.sp */
 struct inactive_task_frame {
+#ifdef 

[PATCH v3 5/7] x86: Pass kernel thread parameters in fork_frame

2016-08-13 Thread Brian Gerst
Instead of setting up a fake pt_regs context, put the kernel thread
function pointer and arg into the unused callee-restored registers
of struct fork_frame.

Signed-off-by: Brian Gerst 
---
 arch/x86/entry/entry_32.S| 31 +++
 arch/x86/entry/entry_64.S| 37 +
 arch/x86/include/asm/switch_to.h |  2 ++
 arch/x86/kernel/process_32.c | 18 --
 arch/x86/kernel/process_64.c | 12 +++-
 5 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bf8f221..b75a8bc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -240,35 +240,34 @@ END(__switch_to_asm)
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
+ * ebx: kernel thread func (NULL for user thread)
+ * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
pushl   %eax
callschedule_tail
popl%eax
 
+   testl   %ebx, %ebx
+   jnz 1f  /* kernel threads are uncommon */
+
+2:
/* When we fork, we trace the syscall return in the child, too. */
movl%esp, %eax
callsyscall_return_slowpath
jmp restore_all
-END(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-   pushl   %eax
-   callschedule_tail
-   popl%eax
-   movlPT_EBP(%esp), %eax
-   call*PT_EBX(%esp)
-   movl$0, PT_EAX(%esp)
 
+   /* kernel thread */
+1: movl%edi, %eax
+   call*%ebx
/*
-* Kernel threads return to userspace as if returning from a syscall.
-* We should check whether anything actually uses this path and, if so,
-* consider switching it over to ret_from_fork.
+* A kernel thread is allowed to return here after successfully
+* calling do_execve().  Exit to userspace to complete the execve()
+* syscall.
 */
-   movl%esp, %eax
-   callsyscall_return_slowpath
-   jmp restore_all
-ENDPROC(ret_from_kernel_thread)
+   movl$0, PT_EAX(%esp)
+   jmp 2b
+END(ret_from_fork)
 
 /*
  * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c1af8ac..c0373d6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -407,37 +407,34 @@ END(__switch_to_asm)
  * A newly forked process directly context switches into this address.
  *
  * rax: prev task we switched from
+ * rbx: kernel thread func (NULL for user thread)
+ * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
movq%rax, %rdi
callschedule_tail   /* rdi: 'prev' task parameter */
 
-   testb   $3, CS(%rsp)/* from kernel_thread? */
-   jnz 1f
-
-   /*
-* We came from kernel_thread.  This code path is quite twisted, and
-* someone should clean it up.
-*
-* copy_thread_tls stashes the function pointer in RBX and the
-* parameter to be passed in RBP.  The called function is permitted
-* to call do_execve and thereby jump to user mode.
-*/
-   movqRBP(%rsp), %rdi
-   call*RBX(%rsp)
-   movl$0, RAX(%rsp)
-
-   /*
-* Fall through as though we're exiting a syscall.  This makes a
-* twisted sort of sense if we just called do_execve.
-*/
+   testq   %rbx, %rbx  /* from kernel_thread? */
+   jnz 1f  /* kernel threads are uncommon 
*/
 
-1:
+2:
movq%rsp, %rdi
callsyscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON   /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
+
+1:
+   /* kernel thread */
+   movq%r12, %rdi
+   call*%rbx
+   /*
+* A kernel thread is allowed to return here after successfully
+* calling do_execve().  Exit to userspace to complete the execve()
+* syscall.
+*/
+   movq$0, RAX(%rsp)
+   jmp 2b
 END(ret_from_fork)
 
 /*
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index bf4e2ec..33fb765 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,6 +12,8 @@ struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  struct tss_struct *tss);
 
+asmlinkage void ret_from_fork(void);
+
 /* data that is pointed to by thread.sp */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4bedbc0..18714a1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,9 +55,6 @@
 #include 
 #include 
 
-asmlinkage void 

[PATCH v3 7/7] Revert "sched: Mark __schedule() stack frame as non-standard"

2016-08-13 Thread Brian Gerst
Now that the x86 switch_to() uses the standard C calling convention,
STACK_FRAME_NON_STANDARD is no longer needed.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Brian Gerst 
---
 kernel/sched/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b6b23c..dbf73db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3384,7 +3384,6 @@ static void __sched notrace __schedule(bool preempt)
 
balance_callback(rq);
 }
-STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
 
 static inline void sched_submit_work(struct task_struct *tsk)
 {
-- 
2.5.5



[PATCH v3 0/7] x86: Rewrite switch_to()

2016-08-13 Thread Brian Gerst
This patch set simplifies the switch_to() code, by moving the stack switch
code out of line into an asm stub before calling __switch_to().  This ends
up being more readable, and using the C calling convention instead of
clobbering all registers improves code generation.  It also allows newly
forked processes to construct a special stack frame to seamlessly flow
to ret_from_fork, instead of using a test and branch, or an unbalanced
call/ret.

Changes from v2:
- Updated comments around kernel threads being uncommon for fork, etc.
- Removed STACK_FRAME_NON_STANDARD annotation from __schedule() per Josh 
Poimboeuf
- A few minor cleanups added

Changes from v1:
- Added struct inactive_task_frame
- Added comments about kernel threads returning to userspace
- Cleaned up some incorrect uses of thread.sp
- Rearranged inactive stack frame so that BP (frame pointer) is in the natural 
position right below the return address.  This should take care of unwinding 
issues Josh raised.

Brian Gerst (7):
  x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()
  x86-64, kgdb: clear GDB_PS on 64-bit
  x86: Add struct inactive_task_frame
  x86: Rewrite switch_to() code
  x86: Pass kernel thread parameters in fork_frame
  x86: Fix thread_saved_pc()
  Revert "sched: Mark __schedule() stack frame as non-standard"

 arch/x86/entry/entry_32.S  |  68 +-
 arch/x86/entry/entry_64.S  |  78 ++--
 arch/x86/include/asm/processor.h   |  13 +---
 arch/x86/include/asm/stacktrace.h  |   4 +-
 arch/x86/include/asm/switch_to.h   | 144 -
 arch/x86/include/asm/thread_info.h |   2 -
 arch/x86/kernel/asm-offsets.c  |   6 ++
 arch/x86/kernel/asm-offsets_32.c   |   5 ++
 arch/x86/kernel/asm-offsets_64.c   |   5 ++
 arch/x86/kernel/kgdb.c |   8 +--
 arch/x86/kernel/process.c  |  14 +++-
 arch/x86/kernel/process_32.c   |  31 +++-
 arch/x86/kernel/process_64.c   |  21 +++---
 arch/x86/kernel/smpboot.c  |   1 -
 kernel/sched/core.c|   1 -
 15 files changed, 190 insertions(+), 211 deletions(-)

-- 
2.5.5



[PATCH v3 1/7] x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()

2016-08-13 Thread Brian Gerst
Match 64-bit and set gdb_regs[GDB_PC] to zero.  thread.ip is always the
same point in the scheduler (except for newly forked processes), and will
be removed in a future patch.

Signed-off-by: Brian Gerst 
---
 arch/x86/kernel/kgdb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde52..fe649a5 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_ES]= __KERNEL_DS;
gdb_regs[GDB_PS]= 0;
gdb_regs[GDB_CS]= __KERNEL_CS;
-   gdb_regs[GDB_PC]= p->thread.ip;
gdb_regs[GDB_SS]= __KERNEL_DS;
gdb_regs[GDB_FS]= 0x;
gdb_regs[GDB_GS]= 0x;
@@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs32[GDB_PS]  = *(unsigned long *)(p->thread.sp + 8);
gdb_regs32[GDB_CS]  = __KERNEL_CS;
gdb_regs32[GDB_SS]  = __KERNEL_DS;
-   gdb_regs[GDB_PC]= 0;
gdb_regs[GDB_R8]= 0;
gdb_regs[GDB_R9]= 0;
gdb_regs[GDB_R10]   = 0;
@@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_R14]   = 0;
gdb_regs[GDB_R15]   = 0;
 #endif
+   gdb_regs[GDB_PC]= 0;
gdb_regs[GDB_SP]= p->thread.sp;
 }
 
-- 
2.5.5



[PATCH v3 0/7] x86: Rewrite switch_to()

2016-08-13 Thread Brian Gerst
This patch set simplifies the switch_to() code, by moving the stack switch
code out of line into an asm stub before calling __switch_to().  This ends
up being more readable, and using the C calling convention instead of
clobbering all registers improves code generation.  It also allows newly
forked processes to construct a special stack frame to seamlessly flow
to ret_from_fork, instead of using a test and branch, or an unbalanced
call/ret.

Changes from v2:
- Updated comments around kernel threads being uncommon for fork, etc.
- Removed STACK_FRAME_NON_STANDARD annotation from __schedule() per Josh 
Poimboeuf
- A few minor cleanups added

Changes from v1:
- Added struct inactive_task_frame
- Added comments about kernel threads returning to userspace
- Cleaned up some incorrect uses of thread.sp
- Rearranged inactive stack frame so that BP (frame pointer) is in the natural 
position right below the return address.  This should take care of unwinding 
issues Josh raised.

Brian Gerst (7):
  x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()
  x86-64, kgdb: clear GDB_PS on 64-bit
  x86: Add struct inactive_task_frame
  x86: Rewrite switch_to() code
  x86: Pass kernel thread parameters in fork_frame
  x86: Fix thread_saved_pc()
  Revert "sched: Mark __schedule() stack frame as non-standard"

 arch/x86/entry/entry_32.S  |  68 +-
 arch/x86/entry/entry_64.S  |  78 ++--
 arch/x86/include/asm/processor.h   |  13 +---
 arch/x86/include/asm/stacktrace.h  |   4 +-
 arch/x86/include/asm/switch_to.h   | 144 -
 arch/x86/include/asm/thread_info.h |   2 -
 arch/x86/kernel/asm-offsets.c  |   6 ++
 arch/x86/kernel/asm-offsets_32.c   |   5 ++
 arch/x86/kernel/asm-offsets_64.c   |   5 ++
 arch/x86/kernel/kgdb.c |   8 +--
 arch/x86/kernel/process.c  |  14 +++-
 arch/x86/kernel/process_32.c   |  31 +++-
 arch/x86/kernel/process_64.c   |  21 +++---
 arch/x86/kernel/smpboot.c  |   1 -
 kernel/sched/core.c|   1 -
 15 files changed, 190 insertions(+), 211 deletions(-)

-- 
2.5.5



[PATCH v3 1/7] x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()

2016-08-13 Thread Brian Gerst
Match 64-bit and set gdb_regs[GDB_PC] to zero.  thread.ip is always the
same point in the scheduler (except for newly forked processes), and will
be removed in a future patch.

Signed-off-by: Brian Gerst 
---
 arch/x86/kernel/kgdb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde52..fe649a5 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_ES]= __KERNEL_DS;
gdb_regs[GDB_PS]= 0;
gdb_regs[GDB_CS]= __KERNEL_CS;
-   gdb_regs[GDB_PC]= p->thread.ip;
gdb_regs[GDB_SS]= __KERNEL_DS;
gdb_regs[GDB_FS]= 0x;
gdb_regs[GDB_GS]= 0x;
@@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs32[GDB_PS]  = *(unsigned long *)(p->thread.sp + 8);
gdb_regs32[GDB_CS]  = __KERNEL_CS;
gdb_regs32[GDB_SS]  = __KERNEL_DS;
-   gdb_regs[GDB_PC]= 0;
gdb_regs[GDB_R8]= 0;
gdb_regs[GDB_R9]= 0;
gdb_regs[GDB_R10]   = 0;
@@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_R14]   = 0;
gdb_regs[GDB_R15]   = 0;
 #endif
+   gdb_regs[GDB_PC]= 0;
gdb_regs[GDB_SP]= p->thread.sp;
 }
 
-- 
2.5.5



[PATCH v3 2/7] x86-64, kgdb: clear GDB_PS on 64-bit

2016-08-13 Thread Brian Gerst
switch_to() no longer saves EFLAGS, so it's bogus to look for it on the
stack.  Set it to zero like 32-bit.

Signed-off-by: Brian Gerst 
---
 arch/x86/kernel/kgdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index fe649a5..5e3f294 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -176,7 +176,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_FS]= 0x;
gdb_regs[GDB_GS]= 0x;
 #else
-   gdb_regs32[GDB_PS]  = *(unsigned long *)(p->thread.sp + 8);
+   gdb_regs32[GDB_PS]  = 0;
gdb_regs32[GDB_CS]  = __KERNEL_CS;
gdb_regs32[GDB_SS]  = __KERNEL_DS;
gdb_regs[GDB_R8]= 0;
-- 
2.5.5



[PATCH v3 2/7] x86-64, kgdb: clear GDB_PS on 64-bit

2016-08-13 Thread Brian Gerst
switch_to() no longer saves EFLAGS, so it's bogus to look for it on the
stack.  Set it to zero like 32-bit.

Signed-off-by: Brian Gerst 
---
 arch/x86/kernel/kgdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index fe649a5..5e3f294 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -176,7 +176,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, 
struct task_struct *p)
gdb_regs[GDB_FS]= 0x;
gdb_regs[GDB_GS]= 0x;
 #else
-   gdb_regs32[GDB_PS]  = *(unsigned long *)(p->thread.sp + 8);
+   gdb_regs32[GDB_PS]  = 0;
gdb_regs32[GDB_CS]  = __KERNEL_CS;
gdb_regs32[GDB_SS]  = __KERNEL_DS;
gdb_regs[GDB_R8]= 0;
-- 
2.5.5



[PATCH 8/8] power: ds2760_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "monitor_wqueue" is used to monitor the battery
status. It has been identity converted.

It queues multiple work items viz >monitor_work,
>set_charged_work, which require execution ordering.
Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ds2760_battery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 80f73cc..ac92e80 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -566,7 +566,8 @@ static int ds2760_battery_probe(struct platform_device 
*pdev)
INIT_DELAYED_WORK(>monitor_work, ds2760_battery_work);
INIT_DELAYED_WORK(>set_charged_work,
  ds2760_battery_set_charged_work);
-   di->monitor_wqueue = 
create_singlethread_workqueue(dev_name(>dev));
+   di->monitor_wqueue = alloc_ordered_workqueue(dev_name(>dev),
+WQ_MEM_RECLAIM);
if (!di->monitor_wqueue) {
retval = -ESRCH;
goto workqueue_failed;
--
2.1.4



[PATCH 8/8] power: ds2760_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "monitor_wqueue" is used to monitor the battery
status. It has been identity converted.

It queues multiple work items viz >monitor_work,
>set_charged_work, which require execution ordering.
Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ds2760_battery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 80f73cc..ac92e80 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -566,7 +566,8 @@ static int ds2760_battery_probe(struct platform_device 
*pdev)
INIT_DELAYED_WORK(>monitor_work, ds2760_battery_work);
INIT_DELAYED_WORK(>set_charged_work,
  ds2760_battery_set_charged_work);
-   di->monitor_wqueue = 
create_singlethread_workqueue(dev_name(>dev));
+   di->monitor_wqueue = alloc_ordered_workqueue(dev_name(>dev),
+WQ_MEM_RECLAIM);
if (!di->monitor_wqueue) {
retval = -ESRCH;
goto workqueue_failed;
--
2.1.4



[PATCH 7/8] power: ab8500_fg: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "fg_wq" is used for running the FG algorithm periodically.
It has been identity converted.

It has multiple work items viz fg_periodic_work, fg_low_bat_work,
fg_reinit_work, fg_work, fg_acc_cur_work and fg_check_hw_failure_work,
which require execution ordering. Hence, a dedicated ordered workqueue
has been used here.

The WQ_MEM_RECLAIM flag has been set to guarantee forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_fg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 5a36cf8..199f2db 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -3096,7 +3096,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_INIT);

/* Create a work queue for running the FG algorithm */
-   di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq");
+   di->fg_wq = alloc_ordered_workqueue("ab8500_fg_wq", WQ_MEM_RECLAIM);
if (di->fg_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 7/8] power: ab8500_fg: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "fg_wq" is used for running the FG algorithm periodically.
It has been identity converted.

It has multiple work items viz fg_periodic_work, fg_low_bat_work,
fg_reinit_work, fg_work, fg_acc_cur_work and fg_check_hw_failure_work,
which require execution ordering. Hence, a dedicated ordered workqueue
has been used here.

The WQ_MEM_RECLAIM flag has been set to guarantee forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_fg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 5a36cf8..199f2db 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -3096,7 +3096,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_INIT);

/* Create a work queue for running the FG algorithm */
-   di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq");
+   di->fg_wq = alloc_ordered_workqueue("ab8500_fg_wq", WQ_MEM_RECLAIM);
if (di->fg_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 6/8] power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "wq" is used for handling battery related tasks.

It has a single work item viz >update and hence it doesn't require
execution ordering. Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ipaq_micro_battery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ipaq_micro_battery.c 
b/drivers/power/ipaq_micro_battery.c
index 35b01c7..4af7b77 100644
--- a/drivers/power/ipaq_micro_battery.c
+++ b/drivers/power/ipaq_micro_battery.c
@@ -235,7 +235,7 @@ static int micro_batt_probe(struct platform_device *pdev)
return -ENOMEM;

mb->micro = dev_get_drvdata(pdev->dev.parent);
-   mb->wq = create_singlethread_workqueue("ipaq-battery-wq");
+   mb->wq = alloc_workqueue("ipaq-battery-wq", WQ_MEM_RECLAIM, 0);
if (!mb->wq)
return -ENOMEM;

--
2.1.4



[PATCH 6/8] power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "wq" is used for handling battery related tasks.

It has a single work item viz >update and hence it doesn't require
execution ordering. Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ipaq_micro_battery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ipaq_micro_battery.c 
b/drivers/power/ipaq_micro_battery.c
index 35b01c7..4af7b77 100644
--- a/drivers/power/ipaq_micro_battery.c
+++ b/drivers/power/ipaq_micro_battery.c
@@ -235,7 +235,7 @@ static int micro_batt_probe(struct platform_device *pdev)
return -ENOMEM;

mb->micro = dev_get_drvdata(pdev->dev.parent);
-   mb->wq = create_singlethread_workqueue("ipaq-battery-wq");
+   mb->wq = alloc_workqueue("ipaq-battery-wq", WQ_MEM_RECLAIM, 0);
if (!mb->wq)
return -ENOMEM;

--
2.1.4



Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs

2016-08-13 Thread Oleg Nesterov
On 08/12, Bart Van Assche wrote:
>
> On 08/12/2016 09:16 AM, Oleg Nesterov wrote:
> > Please drop two patches I sent before and try the new one below.
>
> Hello Oleg,
>
> Thanks for the patch. In addition to your patch I also applied the
> attached two patches

And I guess you did this because you think we do not have enough
confusion so you decided to add a bit more ;)

Could you please test my patch alone without additional changes?

> before I started testing. It took some time
> before I could reproduce the hang in truncate_inode_pages_range().

all I can say this contradicts with the prvious testing results with
my previous patch or with your change in abort_exclusive_wait().

> +int __lock_page_impl(struct page *page, int mode)
> +{
> + struct page *page_head = compound_head(page);
> + DEFINE_WAIT_BIT(wait, _head->flags, PG_locked);
> + struct task_struct *owner;
> + int res;
> +
> + for (;;) {
> + wait.key.timeout = jiffies + 30 * HZ;
> + res = __wait_on_bit_lock(page_waitqueue(page_head),
> +  , bit_wait_io_timeout, mode);
> + if (res == 0) {
> + set_page_lock_owner(page, current);

this is not right, you should use page_head. Although I doubt this can
make a difference in this case. The same for get_page_lock_owner() below.

> + break;
> + }
> + if (res == -EINTR)
> + break;
> + owner = get_page_lock_owner(page);
> + pr_info("%s / pid %d / m %#x: %s - continuing to wait for %d\n",
> + __func__, task_pid_nr(current), mode, res == -EAGAIN ?
> + "timeout" : "interrupted",
> + owner ? task_pid_nr(owner) : 0);

I thought about the similar debugging patch too. But this is not what
we need. Note that if res == -EAGAIN then another exlcusive waiter was
already woken and it can lock this page and set get_page_lock_owner().
So this can't actually help if the problem is the missed/lost wakeup.

Not that it explains the strange dmesg you reported. Perhaps your patch
has other bugs, or my patch is buggy, or both. Please do not mix them.

As for "add the timeout" idea it makes sense too and perhaps we will test
this later, but we can start with the much more simple patch.

Oleg.



Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs

2016-08-13 Thread Oleg Nesterov
On 08/12, Bart Van Assche wrote:
>
> On 08/12/2016 09:16 AM, Oleg Nesterov wrote:
> > Please drop two patches I sent before and try the new one below.
>
> Hello Oleg,
>
> Thanks for the patch. In addition to your patch I also applied the
> attached two patches

And I guess you did this because you think we do not have enough
confusion so you decided to add a bit more ;)

Could you please test my patch alone without additional changes?

> before I started testing. It took some time
> before I could reproduce the hang in truncate_inode_pages_range().

all I can say this contradicts with the prvious testing results with
my previous patch or with your change in abort_exclusive_wait().

> +int __lock_page_impl(struct page *page, int mode)
> +{
> + struct page *page_head = compound_head(page);
> + DEFINE_WAIT_BIT(wait, _head->flags, PG_locked);
> + struct task_struct *owner;
> + int res;
> +
> + for (;;) {
> + wait.key.timeout = jiffies + 30 * HZ;
> + res = __wait_on_bit_lock(page_waitqueue(page_head),
> +  , bit_wait_io_timeout, mode);
> + if (res == 0) {
> + set_page_lock_owner(page, current);

this is not right, you should use page_head. Although I doubt this can
make a difference in this case. The same for get_page_lock_owner() below.

> + break;
> + }
> + if (res == -EINTR)
> + break;
> + owner = get_page_lock_owner(page);
> + pr_info("%s / pid %d / m %#x: %s - continuing to wait for %d\n",
> + __func__, task_pid_nr(current), mode, res == -EAGAIN ?
> + "timeout" : "interrupted",
> + owner ? task_pid_nr(owner) : 0);

I thought about the similar debugging patch too. But this is not what
we need. Note that if res == -EAGAIN then another exlcusive waiter was
already woken and it can lock this page and set get_page_lock_owner().
So this can't actually help if the problem is the missed/lost wakeup.

Not that it explains the strange dmesg you reported. Perhaps your patch
has other bugs, or my patch is buggy, or both. Please do not mix them.

As for "add the timeout" idea it makes sense too and perhaps we will test
this later, but we can start with the much more simple patch.

Oleg.



[PATCH 5/8] power: ab8500_charger: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "charger_wq" is used for the IRQs and checking HW state of
the charger. It has been identity converted.

It has multiple work items viz usb_charger_attached_work, kick_wd_work,
check_vbat_work, check_hw_failure_work, usb_charger_attached_work,
ac_work, ac_charger_attached_work, attach_work and check_usbchgnotok_work,
which require execution ordering. Hence, a dedicated ordered workqueue
has been used here.

The WQ_MEM_RECLAIM flag has also been set to ensure
forward progress under memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_charger.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 30de5d4..5cee9aa 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -3540,8 +3540,8 @@ static int ab8500_charger_probe(struct platform_device 
*pdev)
di->usb_state.usb_current = -1;

/* Create a work queue for the charger */
-   di->charger_wq =
-   create_singlethread_workqueue("ab8500_charger_wq");
+   di->charger_wq = alloc_ordered_workqueue("ab8500_charger_wq",
+WQ_MEM_RECLAIM);
if (di->charger_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 5/8] power: ab8500_charger: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "charger_wq" is used for the IRQs and checking HW state of
the charger. It has been identity converted.

It has multiple work items viz usb_charger_attached_work, kick_wd_work,
check_vbat_work, check_hw_failure_work, usb_charger_attached_work,
ac_work, ac_charger_attached_work, attach_work and check_usbchgnotok_work,
which require execution ordering. Hence, a dedicated ordered workqueue
has been used here.

The WQ_MEM_RECLAIM flag has also been set to ensure
forward progress under memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_charger.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 30de5d4..5cee9aa 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -3540,8 +3540,8 @@ static int ab8500_charger_probe(struct platform_device 
*pdev)
di->usb_state.usb_current = -1;

/* Create a work queue for the charger */
-   di->charger_wq =
-   create_singlethread_workqueue("ab8500_charger_wq");
+   di->charger_wq = alloc_ordered_workqueue("ab8500_charger_wq",
+WQ_MEM_RECLAIM);
if (di->charger_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 4/8] power: intel_mid_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "monitor_wqueue" is used to monitor the PMIC battery status.
It queues a single work item (pbi->monitor_battery) and hence doesn't
require ordering. Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

Since PMIC battery status needs to be monitored for any change, the
WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory
pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/intel_mid_battery.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/power/intel_mid_battery.c 
b/drivers/power/intel_mid_battery.c
index 9fa4acc..dc7feef 100644
--- a/drivers/power/intel_mid_battery.c
+++ b/drivers/power/intel_mid_battery.c
@@ -689,8 +689,7 @@ static int probe(int irq, struct device *dev)
/* initialize all required framework before enabling interrupts */
INIT_WORK(>handler, pmic_battery_handle_intrpt);
INIT_DELAYED_WORK(>monitor_battery, pmic_battery_monitor);
-   pbi->monitor_wqueue =
-   create_singlethread_workqueue(dev_name(dev));
+   pbi->monitor_wqueue = alloc_workqueue(dev_name(dev), WQ_MEM_RECLAIM, 0);
if (!pbi->monitor_wqueue) {
dev_err(dev, "%s(): wqueue init failed\n", __func__);
retval = -ESRCH;
--
2.1.4



[PATCH 4/8] power: intel_mid_battery: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "monitor_wqueue" is used to monitor the PMIC battery status.
It queues a single work item (pbi->monitor_battery) and hence doesn't
require ordering. Hence, alloc_workqueue has been used to replace the
deprecated create_singlethread_workqueue instance.

Since PMIC battery status needs to be monitored for any change, the
WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory
pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/intel_mid_battery.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/power/intel_mid_battery.c 
b/drivers/power/intel_mid_battery.c
index 9fa4acc..dc7feef 100644
--- a/drivers/power/intel_mid_battery.c
+++ b/drivers/power/intel_mid_battery.c
@@ -689,8 +689,7 @@ static int probe(int irq, struct device *dev)
/* initialize all required framework before enabling interrupts */
INIT_WORK(>handler, pmic_battery_handle_intrpt);
INIT_DELAYED_WORK(>monitor_battery, pmic_battery_monitor);
-   pbi->monitor_wqueue =
-   create_singlethread_workqueue(dev_name(dev));
+   pbi->monitor_wqueue = alloc_workqueue(dev_name(dev), WQ_MEM_RECLAIM, 0);
if (!pbi->monitor_wqueue) {
dev_err(dev, "%s(): wqueue init failed\n", __func__);
retval = -ESRCH;
--
2.1.4



[PATCH 3/8] power: pm2301_charger: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "charger_wq" is used for running all the charger related
tasks. This involves charger detection, checking for HW failure and HW
status. This workqueue has been identity converted.

It queues multiple workitems viz >check_main_thermal_prot_work,
>check_hw_failure_work, >ac_work. Hence, the deprecated
create_singlethread_workqueue() instance has been replaced with a
dedicated ordered workqueue.

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/pm2301_charger.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index fb62ed3..78561b6 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -1054,7 +1054,8 @@ static int pm2xxx_wall_charger_probe(struct i2c_client 
*i2c_client,
pm2->ac_chg.external = true;

/* Create a work queue for the charger */
-   pm2->charger_wq = create_singlethread_workqueue("pm2xxx_charger_wq");
+   pm2->charger_wq = alloc_ordered_workqueue("pm2xxx_charger_wq",
+ WQ_MEM_RECLAIM);
if (pm2->charger_wq == NULL) {
ret = -ENOMEM;
dev_err(pm2->dev, "failed to create work queue\n");
--
2.1.4



[PATCH 3/8] power: pm2301_charger: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "charger_wq" is used for running all the charger related
tasks. This involves charger detection, checking for HW failure and HW
status. This workqueue has been identity converted.

It queues multiple workitems viz >check_main_thermal_prot_work,
>check_hw_failure_work, >ac_work. Hence, the deprecated
create_singlethread_workqueue() instance has been replaced with a
dedicated ordered workqueue.

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/pm2301_charger.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index fb62ed3..78561b6 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -1054,7 +1054,8 @@ static int pm2xxx_wall_charger_probe(struct i2c_client 
*i2c_client,
pm2->ac_chg.external = true;

/* Create a work queue for the charger */
-   pm2->charger_wq = create_singlethread_workqueue("pm2xxx_charger_wq");
+   pm2->charger_wq = alloc_ordered_workqueue("pm2xxx_charger_wq",
+ WQ_MEM_RECLAIM);
if (pm2->charger_wq == NULL) {
ret = -ENOMEM;
dev_err(pm2->dev, "failed to create work queue\n");
--
2.1.4



[PATCH 2/8] power: ab8500_btemp: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "btemp_wq" is used for measuring the temperature
periodically. It queues a single workitem (btemp_periodic_work) and
hence doesn't require ordering. Thus, the deprecated
create_singlethread_workqueue() instance has been replaced with
alloc_workqueue().

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_btemp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index bf2e5dd..6ffdc18 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -1095,7 +1095,7 @@ static int ab8500_btemp_probe(struct platform_device 
*pdev)

/* Create a work queue for the btemp */
di->btemp_wq =
-   create_singlethread_workqueue("ab8500_btemp_wq");
+   alloc_workqueue("ab8500_btemp_wq", WQ_MEM_RECLAIM, 0);
if (di->btemp_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 2/8] power: ab8500_btemp: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue "btemp_wq" is used for measuring the temperature
periodically. It queues a single workitem (btemp_periodic_work) and
hence doesn't require ordering. Thus, the deprecated
create_singlethread_workqueue() instance has been replaced with
alloc_workqueue().

The WQ_MEM_RECLAIM flag has been set to ensure forward progress under
memory pressure.

Since there is a single work item, explicit concurrency
limit is unnecessary here.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/ab8500_btemp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index bf2e5dd..6ffdc18 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -1095,7 +1095,7 @@ static int ab8500_btemp_probe(struct platform_device 
*pdev)

/* Create a work queue for the btemp */
di->btemp_wq =
-   create_singlethread_workqueue("ab8500_btemp_wq");
+   alloc_workqueue("ab8500_btemp_wq", WQ_MEM_RECLAIM, 0);
if (di->btemp_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 1/8] power: abx500_chargalg: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "chargalg_wq" is used for running the charging algorithm.
It has multiple workitems viz >chargalg_periodic_work,
>chargalg_wd_work, >chargalg_work per abx500_chargalg, which
require ordering. It has been identity converted.

Also, WQ_MEM_RECLAIM has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/abx500_chargalg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index d9104b1..a4411d6 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -2091,8 +2091,8 @@ static int abx500_chargalg_probe(struct platform_device 
*pdev)
abx500_chargalg_maintenance_timer_expired;

/* Create a work queue for the chargalg */
-   di->chargalg_wq =
-   create_singlethread_workqueue("abx500_chargalg_wq");
+   di->chargalg_wq = alloc_ordered_workqueue("abx500_chargalg_wq",
+  WQ_MEM_RECLAIM);
if (di->chargalg_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 1/8] power: abx500_chargalg: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces
deprecated create_singlethread_workqueue(). This is the identity
conversion.

The workqueue "chargalg_wq" is used for running the charging algorithm.
It has multiple workitems viz >chargalg_periodic_work,
>chargalg_wd_work, >chargalg_work per abx500_chargalg, which
require ordering. It has been identity converted.

Also, WQ_MEM_RECLAIM has been set to ensure forward progress under
memory pressure.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/power/abx500_chargalg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index d9104b1..a4411d6 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -2091,8 +2091,8 @@ static int abx500_chargalg_probe(struct platform_device 
*pdev)
abx500_chargalg_maintenance_timer_expired;

/* Create a work queue for the chargalg */
-   di->chargalg_wq =
-   create_singlethread_workqueue("abx500_chargalg_wq");
+   di->chargalg_wq = alloc_ordered_workqueue("abx500_chargalg_wq",
+  WQ_MEM_RECLAIM);
if (di->chargalg_wq == NULL) {
dev_err(di->dev, "failed to create work queue\n");
return -ENOMEM;
--
2.1.4



[PATCH 0/8] Remove deprecated workqueue interface users

2016-08-13 Thread Bhaktipriya Shridhar
This patch set removes the instances of deprecated
create_singlethread_workqueues in drivers/power by making the appropriate
conversions.

Bhaktipriya Shridhar (8):
  power: abx500_chargalg: Remove deprecated create_singlethread_workqueue
  power: ab8500_btemp: Remove deprecated create_singlethread_workqueue
  power: pm2301_charger: Remove deprecated create_singlethread_workqueue
  power: intel_mid_battery: Remove deprecated create_singlethread_workqueue
  power: ab8500_charger: Remove deprecated create_singlethread_workqueue
  power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue
  power: ab8500_fg: Remove deprecated create_singlethread_workqueue
  power: ds2760_battery: Remove deprecated create_singlethread_workqueue

 drivers/power/ab8500_btemp.c   | 2 +-
 drivers/power/ab8500_charger.c | 4 ++--
 drivers/power/ab8500_fg.c  | 2 +-
 drivers/power/abx500_chargalg.c| 4 ++--
 drivers/power/ds2760_battery.c | 3 ++-
 drivers/power/intel_mid_battery.c  | 3 +--
 drivers/power/ipaq_micro_battery.c | 2 +-
 drivers/power/pm2301_charger.c | 3 ++-
 8 files changed, 12 insertions(+), 11 deletions(-)

--
2.1.4



[PATCH 0/8] Remove deprecated workqueue interface users

2016-08-13 Thread Bhaktipriya Shridhar
This patch set removes the instances of deprecated
create_singlethread_workqueues in drivers/power by making the appropriate
conversions.

Bhaktipriya Shridhar (8):
  power: abx500_chargalg: Remove deprecated create_singlethread_workqueue
  power: ab8500_btemp: Remove deprecated create_singlethread_workqueue
  power: pm2301_charger: Remove deprecated create_singlethread_workqueue
  power: intel_mid_battery: Remove deprecated create_singlethread_workqueue
  power: ab8500_charger: Remove deprecated create_singlethread_workqueue
  power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue
  power: ab8500_fg: Remove deprecated create_singlethread_workqueue
  power: ds2760_battery: Remove deprecated create_singlethread_workqueue

 drivers/power/ab8500_btemp.c   | 2 +-
 drivers/power/ab8500_charger.c | 4 ++--
 drivers/power/ab8500_fg.c  | 2 +-
 drivers/power/abx500_chargalg.c| 4 ++--
 drivers/power/ds2760_battery.c | 3 ++-
 drivers/power/intel_mid_battery.c  | 3 +--
 drivers/power/ipaq_micro_battery.c | 2 +-
 drivers/power/pm2301_charger.c | 3 ++-
 8 files changed, 12 insertions(+), 11 deletions(-)

--
2.1.4



Re: [RFC PATCH 0/3] Documentation: switch to pdflatex and fix pdf build

2016-08-13 Thread Markus Heiser

Am 13.08.2016 um 00:40 schrieb Jonathan Corbet :

> On Wed, 10 Aug 2016 18:54:06 +0300
> Jani Nikula  wrote:
> 
>> With these you should be able to get started with pdf generation. It's a
>> quick transition to pdflatex, the patches are not very pretty, but the
>> pdf output is. Patch 3/3 works as an example where to add your stuff
>> (latex_documents in conf.py) and how.
> 
> OK, now I have a bone to pick with you.
> 
> I applied this, then decided to install the needed toolchain on the
> Tumbleweed system I've been playing with; it wanted to install 1,727
> packages to get pdflatex.  Pandoc just doesn't seem so bad anymore.

I'am complete disenchanted on this topic. My experience is:

1) You wan't get any reasonable typesetting engine which preserves
your disk space. I don't know how many files or packages are installed,
the only thing I know is, a TeX installation is always >1GB.

2) You wan't get a (pdf, ps,..) book with a perfect layout without
any handcraft or at least a *theming*. TeX has many options to influence
the layout and Sphinx provides it's own LaTeX-document class (sphinxmanual)
which is IMHO awful.

> So I switched to the Fedora system, and found myself in a twisty maze of
> missing font files, missing style files, missing babel crap, etc., each
> doled out to me one file per run.  But I did eventually get PDFs out of
> it.

On debian it should be enough to install *base* and *recommended*

sudo apt-get install
  texlive-base texlive-latex-recommended

> The output isn't great; among other things, it seems to be about 1/2 blank
> pages.

1/2 ? .. I have only empty pages at the start of parts or chapters, which
is a typical layout setting.

>  But it's something.

This is the sphinxmanual document class.
 
> I've applied this so we have something to play with, but it doesn't feel
> like a great solution.  This is the sort of installation hell that we
> wanted to get away from.  

See above, on debian it should be enough to install the two meta packages.

> It makes me wonder how hard it can really be to
> fix rst2pdf; I wish I could say I'll find some time to figure that out.
> Sigh.

I gave it a try, but as I come closer to the sources I realized that
it is hair-raising. I looked at the issues, added a comment to a related
issue, a few days later the issue was closed without any comment or code
change.

https://github.com/rst2pdf/rst2pdf/issues/556#issuecomment-228779542

My advice, if you don't like to waste your time: forget it.

Some thoughts of mine, wrote in an earlier mail:

> The sphinx-doc build-in LaTeX builder
> 
> * http://www.sphinx-doc.org/en/stable/config.html#options-for-latex-output
> 
> has some drawbacks, e.g. it produce LaTeX for the pdfTeX engine.
> LaTeX is by default ASCII and it needs some "inputenc" to supporta wider
> range of characters. This is not very helpful if you have a toolchain
> in an international community.
> 
> The alternative to LaTeX is to use the XeTeX engine, which supports UTF-8
> encoded input by default and supports TrueType/OpenType fonts directly.
> Thats why I started to write a XeLaTeX builder ...
> 
> * 
> https://github.com/return42/sphkerneldoc/blob/master/scripts/site-python/xelatex_ext/__init__.py#L15
>  
> 
> ... but I can't predict when this will be finished ...
> 
> However which tool is used, my experience is, that building
> PDF (books) with a minimum of quality is not simple.
> Layout width tables, split table content over pages, switch
> from landscape to portrait and versus, the flow of objects etc.
> .. all this will need some manually interventions.


-- Markus --












Re: [RFC PATCH 0/3] Documentation: switch to pdflatex and fix pdf build

2016-08-13 Thread Markus Heiser

Am 13.08.2016 um 00:40 schrieb Jonathan Corbet :

> On Wed, 10 Aug 2016 18:54:06 +0300
> Jani Nikula  wrote:
> 
>> With these you should be able to get started with pdf generation. It's a
>> quick transition to pdflatex, the patches are not very pretty, but the
>> pdf output is. Patch 3/3 works as an example where to add your stuff
>> (latex_documents in conf.py) and how.
> 
> OK, now I have a bone to pick with you.
> 
> I applied this, then decided to install the needed toolchain on the
> Tumbleweed system I've been playing with; it wanted to install 1,727
> packages to get pdflatex.  Pandoc just doesn't seem so bad anymore.

I'am complete disenchanted on this topic. My experience is:

1) You wan't get any reasonable typesetting engine which preserves
your disk space. I don't know how many files or packages are installed,
the only thing I know is, a TeX installation is always >1GB.

2) You wan't get a (pdf, ps,..) book with a perfect layout without
any handcraft or at least a *theming*. TeX has many options to influence
the layout and Sphinx provides it's own LaTeX-document class (sphinxmanual)
which is IMHO awful.

> So I switched to the Fedora system, and found myself in a twisty maze of
> missing font files, missing style files, missing babel crap, etc., each
> doled out to me one file per run.  But I did eventually get PDFs out of
> it.

On debian it should be enough to install *base* and *recommended*

sudo apt-get install
  texlive-base texlive-latex-recommended

> The output isn't great; among other things, it seems to be about 1/2 blank
> pages.

1/2 ? .. I have only empty pages at the start of parts or chapters, which
is a typical layout setting.

>  But it's something.

This is the sphinxmanual document class.
 
> I've applied this so we have something to play with, but it doesn't feel
> like a great solution.  This is the sort of installation hell that we
> wanted to get away from.  

See above, on debian it should be enough to install the two meta packages.

> It makes me wonder how hard it can really be to
> fix rst2pdf; I wish I could say I'll find some time to figure that out.
> Sigh.

I gave it a try, but as I come closer to the sources I realized that
it is hair-raising. I looked at the issues, added a comment to a related
issue, a few days later the issue was closed without any comment or code
change.

https://github.com/rst2pdf/rst2pdf/issues/556#issuecomment-228779542

My advice, if you don't like to waste your time: forget it.

Some thoughts of mine, wrote in an earlier mail:

> The sphinx-doc build-in LaTeX builder
> 
> * http://www.sphinx-doc.org/en/stable/config.html#options-for-latex-output
> 
> has some drawbacks, e.g. it produce LaTeX for the pdfTeX engine.
> LaTeX is by default ASCII and it needs some "inputenc" to supporta wider
> range of characters. This is not very helpful if you have a toolchain
> in an international community.
> 
> The alternative to LaTeX is to use the XeTeX engine, which supports UTF-8
> encoded input by default and supports TrueType/OpenType fonts directly.
> Thats why I started to write a XeLaTeX builder ...
> 
> * 
> https://github.com/return42/sphkerneldoc/blob/master/scripts/site-python/xelatex_ext/__init__.py#L15
>  
> 
> ... but I can't predict when this will be finished ...
> 
> However which tool is used, my experience is, that building
> PDF (books) with a minimum of quality is not simple.
> Layout width tables, split table content over pages, switch
> from landscape to portrait and versus, the flow of objects etc.
> .. all this will need some manually interventions.


-- Markus --












RE: [RFC][PATCH 7/7] cpufreq: intel_pstate: Change P-state selection algorithm for Core

2016-08-13 Thread Doug Smythies
On 2016.08.05 17:02 Rafael J. Wysocki wrote:
>> On 2016.08.03 21:19 Doug Smythies wrote:
>>> On 2016.07.31 16:49 Rafael J. Wysocki wrote:
>>> 
>>> The PID-base P-state selection algorithm used by intel_pstate for
>>> Core processors is based on very weak foundations.
>>
>> ...[cut]...
>> 
>>> +static inline int32_t get_target_pstate_default(struct cpudata *cpu)
>>> +{
>>> +   struct sample *sample = >sample;
>>> +   int32_t busy_frac;
>>> +   int pstate;
>>> +
>>> +   busy_frac = div_fp(sample->mperf, sample->tsc);
>>> +   sample->busy_scaled = busy_frac * 100;
>>> +
>>> +   if (busy_frac < cpu->iowait_boost)
>>> +   busy_frac = cpu->iowait_boost;
>>> +
>>> +   cpu->iowait_boost >>= 1;
>>> +
>>> +   pstate = cpu->pstate.turbo_pstate;
>>> +   return fp_toint((pstate + (pstate >> 2)) * busy_frac);
>>> +}
>>> +
>>
My previous replies (and see below) have suggested that some filtering
is needed on the target pstate, otherwise, and dependant on the type of
workload, it tends to oscillate.

I added the IIR (Infinite Impulse Response) filter that I have suggested in the 
past:

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c43ef55..262ec5f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -98,6 +98,7 @@ static inline u64 div_ext_fp(u64 x, u64 y)
  * @tsc:   Difference of time stamp counter between last and
  * current sample
  * @time:  Current time from scheduler
+ * @target:target pstate filtered.
  *
  * This structure is used in the cpudata structure to store performance sample
  * data for choosing next P State.
@@ -108,6 +109,7 @@ struct sample {
u64 aperf;
u64 mperf;
u64 tsc;
+   u64 target;
u64 time;
 };

@@ -1168,6 +1170,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata 
*cpu)
pstate_funcs.get_vid(cpu);

intel_pstate_set_min_pstate(cpu);
+   cpu->sample.target = int_tofp(cpu->pstate.min_pstate);
 }

 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
@@ -1301,8 +1304,10 @@ static inline int32_t 
get_target_pstate_use_performance(struct cpudata *cpu)
 static inline int32_t get_target_pstate_default(struct cpudata *cpu)
 {
struct sample *sample = >sample;
+   int64_t scaled_gain, unfiltered_target;
int32_t busy_frac;
int pstate;
+   u64 duration_ns;

busy_frac = div_fp(sample->mperf, sample->tsc);
sample->busy_scaled = busy_frac * 100;
@@ -1313,7 +1318,74 @@ static inline int32_t get_target_pstate_default(struct 
cpudata *cpu)
cpu->iowait_boost >>= 1;

pstate = cpu->pstate.turbo_pstate;
-   return fp_toint((pstate + (pstate >> 2)) * busy_frac);
+   /* To Do: I think the above should be:
+*
+* if (limits.no_turbo || limits.turbo_disabled)
+*  pstate = cpu->pstate.max_pstate;
+* else
+*  pstate = cpu->pstate.turbo_pstate;
+*
+* figure it out.
+*
+* no clamps. Pre-filter clamping was needed in past implementations.
+* To Do: Is any pre-filter clamping needed here? */
+
+   unfiltered_target = (pstate + (pstate >> 2)) * busy_frac;
+
+   /*
+* Idle check.
+* We have a deferrable timer. Very long durations can be
+* either due to long idle (C0 time near 0),
+* or due to short idle times that spanned jiffy boundaries
+* (C0 time not near zero).
+*
+* To Do: As of the utilization stuff, I do not think the
+* spanning jiffy boundaries thing is true anymore.
+* Check, and fix the comment.
+*
+* The very long durations are 0.4 seconds or more.
+* Either way, a very long duration will effectively flush
+* the IIR filter, otherwise falling edge load response times
+* can be on the order of tens of seconds, because this driver
+* runs very rarely. Furthermore, for higher periodic loads that
+* just so happen to not be in the C0 state on jiffy boundaries,
+* the long ago history should be forgotten.
+* For cases of durations that are a few times the set sample
+* period, increase the IIR filter gain so as to weight
+* the current sample more appropriately.
+*
+* To Do: sample_time should be forced to be accurate. For
+* example if the kernel is a 250 Hz kernel, then a
+* sample_rate_ms of 10 should result in a sample_time of 12.
+*
+* To Do: Check that the IO Boost case is not filtered too much.
+*It might be that a filter by-pass is needed for the boost 
case.
+*However, the existing gain = f(duration) might be good enough.
+*/
+
+   duration_ns = cpu->sample.time - cpu->last_sample_time;
+
+   scaled_gain = div_u64(int_tofp(duration_ns) *
+   

RE: [RFC][PATCH 7/7] cpufreq: intel_pstate: Change P-state selection algorithm for Core

2016-08-13 Thread Doug Smythies
On 2016.08.05 17:02 Rafael J. Wysocki wrote:
>> On 2016.08.03 21:19 Doug Smythies wrote:
>>> On 2016.07.31 16:49 Rafael J. Wysocki wrote:
>>> 
>>> The PID-base P-state selection algorithm used by intel_pstate for
>>> Core processors is based on very weak foundations.
>>
>> ...[cut]...
>> 
>>> +static inline int32_t get_target_pstate_default(struct cpudata *cpu)
>>> +{
>>> +   struct sample *sample = >sample;
>>> +   int32_t busy_frac;
>>> +   int pstate;
>>> +
>>> +   busy_frac = div_fp(sample->mperf, sample->tsc);
>>> +   sample->busy_scaled = busy_frac * 100;
>>> +
>>> +   if (busy_frac < cpu->iowait_boost)
>>> +   busy_frac = cpu->iowait_boost;
>>> +
>>> +   cpu->iowait_boost >>= 1;
>>> +
>>> +   pstate = cpu->pstate.turbo_pstate;
>>> +   return fp_toint((pstate + (pstate >> 2)) * busy_frac);
>>> +}
>>> +
>>
My previous replies (and see below) have suggested that some filtering
is needed on the target pstate, otherwise, and dependant on the type of
workload, it tends to oscillate.

I added the IIR (Infinite Impulse Response) filter that I have suggested in the 
past:

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c43ef55..262ec5f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -98,6 +98,7 @@ static inline u64 div_ext_fp(u64 x, u64 y)
  * @tsc:   Difference of time stamp counter between last and
  * current sample
  * @time:  Current time from scheduler
+ * @target:target pstate filtered.
  *
  * This structure is used in the cpudata structure to store performance sample
  * data for choosing next P State.
@@ -108,6 +109,7 @@ struct sample {
u64 aperf;
u64 mperf;
u64 tsc;
+   u64 target;
u64 time;
 };

@@ -1168,6 +1170,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata 
*cpu)
pstate_funcs.get_vid(cpu);

intel_pstate_set_min_pstate(cpu);
+   cpu->sample.target = int_tofp(cpu->pstate.min_pstate);
 }

 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
@@ -1301,8 +1304,10 @@ static inline int32_t 
get_target_pstate_use_performance(struct cpudata *cpu)
 static inline int32_t get_target_pstate_default(struct cpudata *cpu)
 {
struct sample *sample = >sample;
+   int64_t scaled_gain, unfiltered_target;
int32_t busy_frac;
int pstate;
+   u64 duration_ns;

busy_frac = div_fp(sample->mperf, sample->tsc);
sample->busy_scaled = busy_frac * 100;
@@ -1313,7 +1318,74 @@ static inline int32_t get_target_pstate_default(struct 
cpudata *cpu)
cpu->iowait_boost >>= 1;

pstate = cpu->pstate.turbo_pstate;
-   return fp_toint((pstate + (pstate >> 2)) * busy_frac);
+   /* To Do: I think the above should be:
+*
+* if (limits.no_turbo || limits.turbo_disabled)
+*  pstate = cpu->pstate.max_pstate;
+* else
+*  pstate = cpu->pstate.turbo_pstate;
+*
+* figure it out.
+*
+* no clamps. Pre-filter clamping was needed in past implementations.
+* To Do: Is any pre-filter clamping needed here? */
+
+   unfiltered_target = (pstate + (pstate >> 2)) * busy_frac;
+
+   /*
+* Idle check.
+* We have a deferrable timer. Very long durations can be
+* either due to long idle (C0 time near 0),
+* or due to short idle times that spanned jiffy boundaries
+* (C0 time not near zero).
+*
+* To Do: As of the utilization stuff, I do not think the
+* spanning jiffy boundaries thing is true anymore.
+* Check, and fix the comment.
+*
+* The very long durations are 0.4 seconds or more.
+* Either way, a very long duration will effectively flush
+* the IIR filter, otherwise falling edge load response times
+* can be on the order of tens of seconds, because this driver
+* runs very rarely. Furthermore, for higher periodic loads that
+* just so happen to not be in the C0 state on jiffy boundaries,
+* the long ago history should be forgotten.
+* For cases of durations that are a few times the set sample
+* period, increase the IIR filter gain so as to weight
+* the current sample more appropriately.
+*
+* To Do: sample_time should be forced to be accurate. For
+* example if the kernel is a 250 Hz kernel, then a
+* sample_rate_ms of 10 should result in a sample_time of 12.
+*
+* To Do: Check that the IO Boost case is not filtered too much.
+*It might be that a filter by-pass is needed for the boost 
case.
+*However, the existing gain = f(duration) might be good enough.
+*/
+
+   duration_ns = cpu->sample.time - cpu->last_sample_time;
+
+   scaled_gain = div_u64(int_tofp(duration_ns) *
+   

[PATCH] whci: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue replaces the deprecated
create_singlethread_workqueue.

The workqueue "workqueue" has multiple workitems which may require
ordering. Hence, a dedicated ordered workqueue has been used.
Since the workqueue is not being used on a memory reclaim path,
WQ_MEM_RECLAIM has not been set.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/usb/host/whci/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c
index e363723..ad8eb57 100644
--- a/drivers/usb/host/whci/init.c
+++ b/drivers/usb/host/whci/init.c
@@ -65,7 +65,7 @@ int whc_init(struct whc *whc)
init_waitqueue_head(>cmd_wq);
init_waitqueue_head(>async_list_wq);
init_waitqueue_head(>periodic_list_wq);
-   whc->workqueue = 
create_singlethread_workqueue(dev_name(>umc->dev));
+   whc->workqueue = alloc_ordered_workqueue(dev_name(>umc->dev), 0);
if (whc->workqueue == NULL) {
ret = -ENOMEM;
goto error;
--
2.1.4



[PATCH] whci: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
alloc_ordered_workqueue replaces the deprecated
create_singlethread_workqueue.

The workqueue "workqueue" has multiple workitems which may require
ordering. Hence, a dedicated ordered workqueue has been used.
Since the workqueue is not being used on a memory reclaim path,
WQ_MEM_RECLAIM has not been set.

Signed-off-by: Bhaktipriya Shridhar 
---
 drivers/usb/host/whci/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c
index e363723..ad8eb57 100644
--- a/drivers/usb/host/whci/init.c
+++ b/drivers/usb/host/whci/init.c
@@ -65,7 +65,7 @@ int whc_init(struct whc *whc)
init_waitqueue_head(>cmd_wq);
init_waitqueue_head(>async_list_wq);
init_waitqueue_head(>periodic_list_wq);
-   whc->workqueue = 
create_singlethread_workqueue(dev_name(>umc->dev));
+   whc->workqueue = alloc_ordered_workqueue(dev_name(>umc->dev), 0);
if (whc->workqueue == NULL) {
ret = -ENOMEM;
goto error;
--
2.1.4



Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)

2016-08-13 Thread Frederic Weisbecker
On Fri, Aug 12, 2016 at 09:19:19AM -0700, Paul E. McKenney wrote:
> On Fri, Aug 12, 2016 at 04:26:13PM +0200, Frederic Weisbecker wrote:
> > On Fri, Aug 12, 2016 at 09:23:13AM -0500, Christoph Lameter wrote:
> > > On Thu, 11 Aug 2016, Paul E. McKenney wrote:
> > > 
> > > > Heh!  The only really good idea is for clocks to be reliably in sync.
> > > >
> > > > But if they go out of sync, what do you want to do instead?
> > > 
> > > For a NOHZ task? Write a message to the syslog and reenable tick.
> 
> Fair enough!  Kicking off a low-priority task would achieve the latter
> but not necessarily the former.  And of course assumes that the worker
> thread is at real-time priority with various scheduler anti-starvation
> features disabled.
> 
> > Indeed, a strong clocksource is a requirement for a full tickless machine.
> 
> No disagrement here!  ;-)

I have a bot in my mind that randomly posts obvious statements about nohz_full
here and then :-)


Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)

2016-08-13 Thread Frederic Weisbecker
On Fri, Aug 12, 2016 at 09:19:19AM -0700, Paul E. McKenney wrote:
> On Fri, Aug 12, 2016 at 04:26:13PM +0200, Frederic Weisbecker wrote:
> > On Fri, Aug 12, 2016 at 09:23:13AM -0500, Christoph Lameter wrote:
> > > On Thu, 11 Aug 2016, Paul E. McKenney wrote:
> > > 
> > > > Heh!  The only really good idea is for clocks to be reliably in sync.
> > > >
> > > > But if they go out of sync, what do you want to do instead?
> > > 
> > > For a NOHZ task? Write a message to the syslog and reenable tick.
> 
> Fair enough!  Kicking off a low-priority task would achieve the latter
> but not necessarily the former.  And of course assumes that the worker
> thread is at real-time priority with various scheduler anti-starvation
> features disabled.
> 
> > Indeed, a strong clocksource is a requirement for a full tickless machine.
> 
> No disagrement here!  ;-)

I have a bot in my mind that randomly posts obvious statements about nohz_full
here and then :-)


[PATCH v2] cfag12864b: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue has a single workitem(_work) and hence doesn't
require ordering. Also, it is not being used on a memory reclaim path.
Hence, the singlethreaded workqueue has been replaced with the use of
system_wq.

System workqueues have been able to handle high level of concurrency
for a long time now and hence it's not required to have a singlethreaded
workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue
created with create_singlethread_workqueue(), system_wq allows multiple
work items to overlap executions even on the same CPU; however, a
per-cpu workqueue doesn't have any CPU locality or global ordering
guarantee unless the target CPU is explicitly specified and thus the
increase of local concurrency shouldn't make any difference.

Work item has been sync cancelled in cfag12864b_disable() to ensure that
there are no pending tasks while disconnecting the driver.

Signed-off-by: Bhaktipriya Shridhar 
---
 Changes in v2:
 -Used cancel_delayed_work_sync instead of cancel delayed_work
 to ensure that the work item is finished.

 drivers/auxdisplay/cfag12864b.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c
index 41ce4bd..551b902 100644
--- a/drivers/auxdisplay/cfag12864b.c
+++ b/drivers/auxdisplay/cfag12864b.c
@@ -223,12 +223,11 @@ static unsigned char *cfag12864b_cache;
 static DEFINE_MUTEX(cfag12864b_mutex);
 static unsigned char cfag12864b_updating;
 static void cfag12864b_update(struct work_struct *delayed_work);
-static struct workqueue_struct *cfag12864b_workqueue;
 static DECLARE_DELAYED_WORK(cfag12864b_work, cfag12864b_update);

 static void cfag12864b_queue(void)
 {
-   queue_delayed_work(cfag12864b_workqueue, _work,
+   schedule_delayed_work(_work,
HZ / cfag12864b_rate);
 }

@@ -256,8 +255,7 @@ void cfag12864b_disable(void)

if (cfag12864b_updating) {
cfag12864b_updating = 0;
-   cancel_delayed_work(_work);
-   flush_workqueue(cfag12864b_workqueue);
+   cancel_delayed_work_sync(_work);
}

mutex_unlock(_mutex);
@@ -357,19 +355,12 @@ static int __init cfag12864b_init(void)
goto bufferalloced;
}

-   cfag12864b_workqueue = create_singlethread_workqueue(CFAG12864B_NAME);
-   if (cfag12864b_workqueue == NULL)
-   goto cachealloced;
-
cfag12864b_clear();
cfag12864b_on();

cfag12864b_inited = 1;
return 0;

-cachealloced:
-   kfree(cfag12864b_cache);
-
 bufferalloced:
free_page((unsigned long) cfag12864b_buffer);

@@ -381,7 +372,6 @@ static void __exit cfag12864b_exit(void)
 {
cfag12864b_disable();
cfag12864b_off();
-   destroy_workqueue(cfag12864b_workqueue);
kfree(cfag12864b_cache);
free_page((unsigned long) cfag12864b_buffer);
 }
--
2.1.4



[PATCH v2] cfag12864b: Remove deprecated create_singlethread_workqueue

2016-08-13 Thread Bhaktipriya Shridhar
The workqueue has a single workitem(_work) and hence doesn't
require ordering. Also, it is not being used on a memory reclaim path.
Hence, the singlethreaded workqueue has been replaced with the use of
system_wq.

System workqueues have been able to handle high level of concurrency
for a long time now and hence it's not required to have a singlethreaded
workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue
created with create_singlethread_workqueue(), system_wq allows multiple
work items to overlap executions even on the same CPU; however, a
per-cpu workqueue doesn't have any CPU locality or global ordering
guarantee unless the target CPU is explicitly specified and thus the
increase of local concurrency shouldn't make any difference.

Work item has been sync cancelled in cfag12864b_disable() to ensure that
there are no pending tasks while disconnecting the driver.

Signed-off-by: Bhaktipriya Shridhar 
---
 Changes in v2:
 -Used cancel_delayed_work_sync instead of cancel delayed_work
 to ensure that the work item is finished.

 drivers/auxdisplay/cfag12864b.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c
index 41ce4bd..551b902 100644
--- a/drivers/auxdisplay/cfag12864b.c
+++ b/drivers/auxdisplay/cfag12864b.c
@@ -223,12 +223,11 @@ static unsigned char *cfag12864b_cache;
 static DEFINE_MUTEX(cfag12864b_mutex);
 static unsigned char cfag12864b_updating;
 static void cfag12864b_update(struct work_struct *delayed_work);
-static struct workqueue_struct *cfag12864b_workqueue;
 static DECLARE_DELAYED_WORK(cfag12864b_work, cfag12864b_update);

 static void cfag12864b_queue(void)
 {
-   queue_delayed_work(cfag12864b_workqueue, _work,
+   schedule_delayed_work(_work,
HZ / cfag12864b_rate);
 }

@@ -256,8 +255,7 @@ void cfag12864b_disable(void)

if (cfag12864b_updating) {
cfag12864b_updating = 0;
-   cancel_delayed_work(_work);
-   flush_workqueue(cfag12864b_workqueue);
+   cancel_delayed_work_sync(_work);
}

mutex_unlock(_mutex);
@@ -357,19 +355,12 @@ static int __init cfag12864b_init(void)
goto bufferalloced;
}

-   cfag12864b_workqueue = create_singlethread_workqueue(CFAG12864B_NAME);
-   if (cfag12864b_workqueue == NULL)
-   goto cachealloced;
-
cfag12864b_clear();
cfag12864b_on();

cfag12864b_inited = 1;
return 0;

-cachealloced:
-   kfree(cfag12864b_cache);
-
 bufferalloced:
free_page((unsigned long) cfag12864b_buffer);

@@ -381,7 +372,6 @@ static void __exit cfag12864b_exit(void)
 {
cfag12864b_disable();
cfag12864b_off();
-   destroy_workqueue(cfag12864b_workqueue);
kfree(cfag12864b_cache);
free_page((unsigned long) cfag12864b_buffer);
 }
--
2.1.4



  1   2   >