[PATCH 3.2 65/94] IB/mlx4: Fix the SQ size of an RC QP
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Yishai Hadascommit f2940e2c76bb554a7fbdd28ca5b90904117a9e96 upstream. When calculating the required size of an RC QP send queue, leave enough space for masked atomic operations, which require more space than "regular" atomic operation. Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Eran Ben Elisha Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Ben Hutchings --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -283,7 +283,7 @@ static int send_wqe_overhead(enum ib_qp_ sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_SMI: case IB_QPT_GSI:
[PATCH 3.16 173/305] USB: quirks: Fix entries on wrong list in 3.16.y
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Ben HutchingsCommits ddbe1fca0bcb ("USB: Add device quirk for ASUS T100 Base Station keyboard") and e5dff0e80463 ("USB: Add OTG PET device to TPL") were wrongly backported to 3.16.y. The original commits added to usb_quirk_list but the backported versions added to usb_interface_quirk_list. Signed-off-by: Ben Hutchings --- --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -188,6 +188,14 @@ static const struct usb_device_id usb_qu /* USB3503 */ { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + + /* Protocol and OTG Electrical Test Device */ + { USB_DEVICE(0x1a0a, 0x0200), .driver_info = + USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Blackmagic Design Intensity Shuttle */ { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM }, @@ -202,14 +210,6 @@ static const struct usb_device_id usb_in { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0), .driver_info = USB_QUIRK_RESET_RESUME }, - /* ASUS Base Station(T100) */ - { USB_DEVICE(0x0b05, 0x17e0), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - - /* Protocol and OTG Electrical Test Device */ - { USB_DEVICE(0x1a0a, 0x0200), .driver_info = - USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, - { } /* terminating entry must be last */ };
[PATCH 3.2 65/94] IB/mlx4: Fix the SQ size of an RC QP
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Yishai Hadas commit f2940e2c76bb554a7fbdd28ca5b90904117a9e96 upstream. When calculating the required size of an RC QP send queue, leave enough space for masked atomic operations, which require more space than "regular" atomic operation. Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Eran Ben Elisha Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Ben Hutchings --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -283,7 +283,7 @@ static int send_wqe_overhead(enum ib_qp_ sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_SMI: case IB_QPT_GSI:
[PATCH 3.16 173/305] USB: quirks: Fix entries on wrong list in 3.16.y
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Ben Hutchings Commits ddbe1fca0bcb ("USB: Add device quirk for ASUS T100 Base Station keyboard") and e5dff0e80463 ("USB: Add OTG PET device to TPL") were wrongly backported to 3.16.y. The original commits added to usb_quirk_list but the backported versions added to usb_interface_quirk_list. Signed-off-by: Ben Hutchings --- --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -188,6 +188,14 @@ static const struct usb_device_id usb_qu /* USB3503 */ { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + + /* Protocol and OTG Electrical Test Device */ + { USB_DEVICE(0x1a0a, 0x0200), .driver_info = + USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Blackmagic Design Intensity Shuttle */ { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM }, @@ -202,14 +210,6 @@ static const struct usb_device_id usb_in { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0), .driver_info = USB_QUIRK_RESET_RESUME }, - /* ASUS Base Station(T100) */ - { USB_DEVICE(0x0b05, 0x17e0), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - - /* Protocol and OTG Electrical Test Device */ - { USB_DEVICE(0x1a0a, 0x0200), .driver_info = - USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, - { } /* terminating entry must be last */ };
[PATCH 3.16 078/305] ring-buffer: Prevent overflow of size in ring_buffer_resize()
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: "Steven Rostedt (Red Hat)"commit 59643d1535eb220668692a5359de22545af579f6 upstream. If the size passed to ring_buffer_resize() is greater than MAX_LONG - BUF_PAGE_SIZE then the DIV_ROUND_UP() will return zero. Here's the details: # echo 18014398509481980 > /sys/kernel/debug/tracing/buffer_size_kb tracing_entries_write() processes this and converts kb to bytes. 18014398509481980 << 10 = 18446744073709547520 and this is passed to ring_buffer_resize() as unsigned long size. size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); Where DIV_ROUND_UP(a, b) is (a + b - 1)/b BUF_PAGE_SIZE is 4080 and here 18446744073709547520 + 4080 - 1 = 18446744073709551599 where 18446744073709551599 is still smaller than 2^64 2^64 - 18446744073709551599 = 17 But now 18446744073709551599 / 4080 = 4521260802379792 and size = size * 4080 = 18446744073709551360 This is checked to make sure its still greater than 2 * 4080, which it is. Then we convert to the number of buffer pages needed. nr_page = DIV_ROUND_UP(size, BUF_PAGE_SIZE) but this time size is 18446744073709551360 and 2^64 - (18446744073709551360 + 4080 - 1) = -3823 Thus it overflows and the resulting number is less than 4080, which makes 3823 / 4080 = 0 an nr_pages is set to this. As we already checked against the minimum that nr_pages may be, this causes the logic to fail as well, and we crash the kernel. There's no reason to have the two DIV_ROUND_UP() (that's just result of historical code changes), clean up the code and fix this bug. Fixes: 83f40318dab00 ("ring-buffer: Make removal of ring buffer pages atomic") Signed-off-by: Steven Rostedt Signed-off-by: Ben Hutchings --- kernel/trace/ring_buffer.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1694,14 +1694,13 @@ int ring_buffer_resize(struct ring_buffe !cpumask_test_cpu(cpu_id, buffer->cpumask)) return size; - size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); - size *= BUF_PAGE_SIZE; + nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); /* we need a minimum of two pages */ - if (size < BUF_PAGE_SIZE * 2) - size = BUF_PAGE_SIZE * 2; + if (nr_pages < 2) + nr_pages = 2; - nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + size = nr_pages * BUF_PAGE_SIZE; /* * Don't succeed if resizing is disabled, as a reader might be
[PATCH 3.16 277/305] ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Kangjie Lucommit cec8f96e49d9be372fdb0c3836dcf31ec71e457e upstream. The stack object “tread” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Ben Hutchings --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1750,6 +1750,7 @@ static int snd_timer_user_params(struct if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) { if (tu->tread) { struct snd_timer_tread tread; + memset(, 0, sizeof(tread)); tread.event = SNDRV_TIMER_EVENT_EARLY; tread.tstamp.tv_sec = 0; tread.tstamp.tv_nsec = 0;
[PATCH 3.16 053/305] MIPS: Don't unwind to user mode with EVA
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: James Hogancommit a816b306c62195b7c43c92cb13330821a96bdc27 upstream. When unwinding through IRQs and exceptions, the unwinding only continues if the PC is a kernel text address, however since EVA it is possible for user and kernel address ranges to overlap, potentially allowing unwinding to continue to user mode if the user PC happens to be in the kernel text address range. Adjust the check to also ensure that the register state from before the exception is actually running in kernel mode, i.e. !user_mode(regs). I don't believe any harm can come of this problem, since the PC is only output, the stack pointer is checked to ensure it resides within the task's stack page before it is dereferenced in search of the return address, and the return address register is similarly only output (if the PC is in a leaf function or the beginning of a non-leaf function). However unwind_stack() is only meant for unwinding kernel code, so to be correct the unwind should stop there. Signed-off-by: James Hogan Reviewed-by: Leonid Yegoshin Cc: linux-m...@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11700/ Signed-off-by: Ralf Baechle Signed-off-by: Ben Hutchings --- arch/mips/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -489,7 +489,7 @@ unsigned long notrace unwind_stack_by_ad *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) { regs = (struct pt_regs *)*sp; pc = regs->cp0_epc; - if (__kernel_text_address(pc)) { + if (!user_mode(regs) && __kernel_text_address(pc)) { *sp = regs->regs[29]; *ra = regs->regs[31]; return pc;
[PATCH 3.16 078/305] ring-buffer: Prevent overflow of size in ring_buffer_resize()
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: "Steven Rostedt (Red Hat)" commit 59643d1535eb220668692a5359de22545af579f6 upstream. If the size passed to ring_buffer_resize() is greater than MAX_LONG - BUF_PAGE_SIZE then the DIV_ROUND_UP() will return zero. Here's the details: # echo 18014398509481980 > /sys/kernel/debug/tracing/buffer_size_kb tracing_entries_write() processes this and converts kb to bytes. 18014398509481980 << 10 = 18446744073709547520 and this is passed to ring_buffer_resize() as unsigned long size. size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); Where DIV_ROUND_UP(a, b) is (a + b - 1)/b BUF_PAGE_SIZE is 4080 and here 18446744073709547520 + 4080 - 1 = 18446744073709551599 where 18446744073709551599 is still smaller than 2^64 2^64 - 18446744073709551599 = 17 But now 18446744073709551599 / 4080 = 4521260802379792 and size = size * 4080 = 18446744073709551360 This is checked to make sure its still greater than 2 * 4080, which it is. Then we convert to the number of buffer pages needed. nr_page = DIV_ROUND_UP(size, BUF_PAGE_SIZE) but this time size is 18446744073709551360 and 2^64 - (18446744073709551360 + 4080 - 1) = -3823 Thus it overflows and the resulting number is less than 4080, which makes 3823 / 4080 = 0 an nr_pages is set to this. As we already checked against the minimum that nr_pages may be, this causes the logic to fail as well, and we crash the kernel. There's no reason to have the two DIV_ROUND_UP() (that's just result of historical code changes), clean up the code and fix this bug. Fixes: 83f40318dab00 ("ring-buffer: Make removal of ring buffer pages atomic") Signed-off-by: Steven Rostedt Signed-off-by: Ben Hutchings --- kernel/trace/ring_buffer.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1694,14 +1694,13 @@ int ring_buffer_resize(struct ring_buffe !cpumask_test_cpu(cpu_id, buffer->cpumask)) return size; - size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); - size *= BUF_PAGE_SIZE; + nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); /* we need a minimum of two pages */ - if (size < BUF_PAGE_SIZE * 2) - size = BUF_PAGE_SIZE * 2; + if (nr_pages < 2) + nr_pages = 2; - nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + size = nr_pages * BUF_PAGE_SIZE; /* * Don't succeed if resizing is disabled, as a reader might be
[PATCH 3.16 277/305] ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Kangjie Lu commit cec8f96e49d9be372fdb0c3836dcf31ec71e457e upstream. The stack object “tread” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Ben Hutchings --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1750,6 +1750,7 @@ static int snd_timer_user_params(struct if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) { if (tu->tread) { struct snd_timer_tread tread; + memset(, 0, sizeof(tread)); tread.event = SNDRV_TIMER_EVENT_EARLY; tread.tstamp.tv_sec = 0; tread.tstamp.tv_nsec = 0;
[PATCH 3.16 053/305] MIPS: Don't unwind to user mode with EVA
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: James Hogan commit a816b306c62195b7c43c92cb13330821a96bdc27 upstream. When unwinding through IRQs and exceptions, the unwinding only continues if the PC is a kernel text address, however since EVA it is possible for user and kernel address ranges to overlap, potentially allowing unwinding to continue to user mode if the user PC happens to be in the kernel text address range. Adjust the check to also ensure that the register state from before the exception is actually running in kernel mode, i.e. !user_mode(regs). I don't believe any harm can come of this problem, since the PC is only output, the stack pointer is checked to ensure it resides within the task's stack page before it is dereferenced in search of the return address, and the return address register is similarly only output (if the PC is in a leaf function or the beginning of a non-leaf function). However unwind_stack() is only meant for unwinding kernel code, so to be correct the unwind should stop there. Signed-off-by: James Hogan Reviewed-by: Leonid Yegoshin Cc: linux-m...@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11700/ Signed-off-by: Ralf Baechle Signed-off-by: Ben Hutchings --- arch/mips/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -489,7 +489,7 @@ unsigned long notrace unwind_stack_by_ad *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) { regs = (struct pt_regs *)*sp; pc = regs->cp0_epc; - if (__kernel_text_address(pc)) { + if (!user_mode(regs) && __kernel_text_address(pc)) { *sp = regs->regs[29]; *ra = regs->regs[31]; return pc;
[PATCH 3.2 29/94] sunrpc: Update RPCBIND_MAXNETIDLEN
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Chuck Levercommit 4b9c7f9db9a003f5c342184dc4401c1b7f2efb39 upstream. Commit 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL transports") added a 5-character netid, but did not bump RPCBIND_MAXNETIDLEN from 4 to 5. Fixes: 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL ...") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Ben Hutchings --- include/linux/sunrpc/msg_prot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -149,9 +149,9 @@ typedef __be32 rpc_fraghdr; /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN(4u) +#define RPCBIND_MAXNETIDLEN(5u) /* * Universal addresses are introduced in RFC 1833 and further spelled
[PATCH 3.16 281/305] rds: fix an infoleak in rds_inc_info_copy
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Kangjie Lucommit 4116def2337991b39919f3b448326e21c40e0dbb upstream. The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -543,5 +543,7 @@ void rds_inc_info_copy(struct rds_incomi minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, , sizeof(minfo)); }
[PATCH 3.2 29/94] sunrpc: Update RPCBIND_MAXNETIDLEN
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Chuck Lever commit 4b9c7f9db9a003f5c342184dc4401c1b7f2efb39 upstream. Commit 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL transports") added a 5-character netid, but did not bump RPCBIND_MAXNETIDLEN from 4 to 5. Fixes: 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL ...") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Ben Hutchings --- include/linux/sunrpc/msg_prot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -149,9 +149,9 @@ typedef __be32 rpc_fraghdr; /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN(4u) +#define RPCBIND_MAXNETIDLEN(5u) /* * Universal addresses are introduced in RFC 1833 and further spelled
[PATCH 3.16 281/305] rds: fix an infoleak in rds_inc_info_copy
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Kangjie Lu commit 4116def2337991b39919f3b448326e21c40e0dbb upstream. The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -543,5 +543,7 @@ void rds_inc_info_copy(struct rds_incomi minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, , sizeof(minfo)); }
[PATCH 3.16 005/305] ath5k: Change led pin configuration for compaq c700 laptop
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Joseph Salisburycommit 7b9bc799a445aea95f64f15e0083cb19b5789abe upstream. BugLink: http://bugs.launchpad.net/bugs/972604 Commit 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") added a pin configuration for the Compaq c700 laptop. However, the polarity of the led pin is reversed. It should be red for wifi off and blue for wifi on, but it is the opposite. This bug was reported in the following bug report: http://pad.lv/972604 Fixes: 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") Signed-off-by: Joseph Salisbury Signed-off-by: Kalle Valo Signed-off-by: Ben Hutchings --- drivers/net/wireless/ath/ath5k/led.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -77,7 +77,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath5k_led /* HP Compaq CQ60-206US (ddregg...@jumptv.com) */ { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) }, /* HP Compaq C700 (nitrous...@gmail.com) */ - { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) }, + { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) }, /* LiteOn AR5BXB63 (mag...@salug.it) */ { ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) }, /* IBM-specific AR5212 (all others) */
[PATCH 3.16 201/305] hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Pali Rohárcommit 7613663cc186f8f3c50279390ddc60286758001c upstream. For security reasons ordinary user must not be able to control fan speed via /proc/i8k by default. Some malicious software running under "nobody" user could be able to turn fan off and cause HW problems. So this patch changes default value of "restricted" parameter to 1. Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted" parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from sysfs file /sys/class/dmi/id/product_serial. Old non secure behaviour of file /proc/i8k can be achieved by loading this module with "restricted" parameter set to 0. Note that this patch has effects only for kernels compiled with CONFIG_I8K and only for file /proc/i8k. Hwmon interface provided by this driver was not changed and root access for setting fan speed was needed also before. Reported-by: Mario Limonciello Signed-off-by: Pali Rohár Signed-off-by: Guenter Roeck [bwh: Backported to 3.16: adjust filename, context] Signed-off-by: Ben Hutchings --- drivers/char/i8k.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -62,6 +62,7 @@ static DEFINE_MUTEX(i8k_mutex); static char bios_version[4]; +static char bios_machineid[16]; static struct device *i8k_hwmon_dev; static u32 i8k_hwmon_flags; static int i8k_fan_mult; @@ -85,13 +86,13 @@ static bool ignore_dmi; module_param(ignore_dmi, bool, 0); MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match"); -static bool restricted; +static bool restricted = true; module_param(restricted, bool, 0); -MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set"); +MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)"); static bool power_status; module_param(power_status, bool, 0600); -MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k"); +MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)"); static int fan_mult = I8K_FAN_MULT; module_param(fan_mult, int, 0); @@ -350,9 +351,11 @@ i8k_ioctl_unlocked(struct file *fp, unsi break; case I8K_MACHINE_ID: - memset(buff, 0, 16); - strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), - sizeof(buff)); + if (restricted && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(buff, 0, sizeof(buff)); + strlcpy(buff, bios_machineid, sizeof(buff)); break; case I8K_FN_STATUS: @@ -469,7 +472,7 @@ static int i8k_proc_show(struct seq_file return seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n", I8K_PROC_FMT, bios_version, - i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid, cpu_temp, left_fan, right_fan, left_speed, right_speed, ac_power, fn_key); @@ -765,6 +768,8 @@ static int __init i8k_probe(void) strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); + strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + sizeof(bios_machineid)); /* * Get SMM Dell signature
[PATCH 3.16 224/305] ALSA: dummy: Fix a use-after-free at closing
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Takashi Iwaicommit d5dbbe6569481bf12dcbe3e12cff72c5f78d272c upstream. syzkaller fuzzer spotted a potential use-after-free case in snd-dummy driver when hrtimer is used as backend: > == > BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr 88005e5b6f68 > Read of size 8 by task syz-executor/8984 > = > BUG kmalloc-192 (Not tainted): kasan: bad access detected > - > > Disabling lock debugging due to kernel taint > INFO: Allocated in 0x age=18446705582212484632 > > [< none >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464 > > INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1 > [< none >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481 > > Call Trace: > [] __asan_report_load8_noabort+0x3e/0x40 > mm/kasan/report.c:333 > [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111 > [< inline >] __rb_erase_augmented > include/linux/rbtree_augmented.h:218 > [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427 > [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86 > [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903 > [< inline >] remove_hrtimer kernel/time/hrtimer.c:945 > [] hrtimer_try_to_cancel+0x22a/0x570 > kernel/time/hrtimer.c:1046 > [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066 > [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417 > [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507 > [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106 > [] snd_pcm_action_single+0x76/0x120 > sound/core/pcm_native.c:956 > [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974 > [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139 > [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784 > [] snd_pcm_common_ioctl1+0xfae/0x2150 > sound/core/pcm_native.c:2805 > [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 > sound/core/pcm_native.c:2976 > [] snd_pcm_kernel_ioctl+0x11c/0x160 > sound/core/pcm_native.c:3020 > [] snd_pcm_oss_sync+0x3a4/0xa30 > sound/core/oss/pcm_oss.c:1693 > [] snd_pcm_oss_release+0x1ad/0x280 > sound/core/oss/pcm_oss.c:2483 > . A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which is called certainly before other blocking ops. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Ben Hutchings --- sound/drivers/dummy.c | 1 + 1 file changed, 1 insertion(+) --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -422,6 +422,7 @@ static int dummy_hrtimer_stop(struct snd static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { + hrtimer_cancel(>timer); tasklet_kill(>tasklet); }
[PATCH 3.16 005/305] ath5k: Change led pin configuration for compaq c700 laptop
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Joseph Salisbury commit 7b9bc799a445aea95f64f15e0083cb19b5789abe upstream. BugLink: http://bugs.launchpad.net/bugs/972604 Commit 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") added a pin configuration for the Compaq c700 laptop. However, the polarity of the led pin is reversed. It should be red for wifi off and blue for wifi on, but it is the opposite. This bug was reported in the following bug report: http://pad.lv/972604 Fixes: 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") Signed-off-by: Joseph Salisbury Signed-off-by: Kalle Valo Signed-off-by: Ben Hutchings --- drivers/net/wireless/ath/ath5k/led.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -77,7 +77,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath5k_led /* HP Compaq CQ60-206US (ddregg...@jumptv.com) */ { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) }, /* HP Compaq C700 (nitrous...@gmail.com) */ - { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) }, + { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) }, /* LiteOn AR5BXB63 (mag...@salug.it) */ { ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) }, /* IBM-specific AR5212 (all others) */
[PATCH 3.16 201/305] hwmon: (dell-smm) Restrict fan control and serial number to CAP_SYS_ADMIN by default
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Pali Rohár commit 7613663cc186f8f3c50279390ddc60286758001c upstream. For security reasons ordinary user must not be able to control fan speed via /proc/i8k by default. Some malicious software running under "nobody" user could be able to turn fan off and cause HW problems. So this patch changes default value of "restricted" parameter to 1. Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted" parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from sysfs file /sys/class/dmi/id/product_serial. Old non secure behaviour of file /proc/i8k can be achieved by loading this module with "restricted" parameter set to 0. Note that this patch has effects only for kernels compiled with CONFIG_I8K and only for file /proc/i8k. Hwmon interface provided by this driver was not changed and root access for setting fan speed was needed also before. Reported-by: Mario Limonciello Signed-off-by: Pali Rohár Signed-off-by: Guenter Roeck [bwh: Backported to 3.16: adjust filename, context] Signed-off-by: Ben Hutchings --- drivers/char/i8k.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -62,6 +62,7 @@ static DEFINE_MUTEX(i8k_mutex); static char bios_version[4]; +static char bios_machineid[16]; static struct device *i8k_hwmon_dev; static u32 i8k_hwmon_flags; static int i8k_fan_mult; @@ -85,13 +86,13 @@ static bool ignore_dmi; module_param(ignore_dmi, bool, 0); MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match"); -static bool restricted; +static bool restricted = true; module_param(restricted, bool, 0); -MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set"); +MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)"); static bool power_status; module_param(power_status, bool, 0600); -MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k"); +MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)"); static int fan_mult = I8K_FAN_MULT; module_param(fan_mult, int, 0); @@ -350,9 +351,11 @@ i8k_ioctl_unlocked(struct file *fp, unsi break; case I8K_MACHINE_ID: - memset(buff, 0, 16); - strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), - sizeof(buff)); + if (restricted && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(buff, 0, sizeof(buff)); + strlcpy(buff, bios_machineid, sizeof(buff)); break; case I8K_FN_STATUS: @@ -469,7 +472,7 @@ static int i8k_proc_show(struct seq_file return seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n", I8K_PROC_FMT, bios_version, - i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid, cpu_temp, left_fan, right_fan, left_speed, right_speed, ac_power, fn_key); @@ -765,6 +768,8 @@ static int __init i8k_probe(void) strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); + strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), + sizeof(bios_machineid)); /* * Get SMM Dell signature
[PATCH 3.16 224/305] ALSA: dummy: Fix a use-after-free at closing
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Takashi Iwai commit d5dbbe6569481bf12dcbe3e12cff72c5f78d272c upstream. syzkaller fuzzer spotted a potential use-after-free case in snd-dummy driver when hrtimer is used as backend: > == > BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr 88005e5b6f68 > Read of size 8 by task syz-executor/8984 > = > BUG kmalloc-192 (Not tainted): kasan: bad access detected > - > > Disabling lock debugging due to kernel taint > INFO: Allocated in 0x age=18446705582212484632 > > [< none >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464 > > INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1 > [< none >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481 > > Call Trace: > [] __asan_report_load8_noabort+0x3e/0x40 > mm/kasan/report.c:333 > [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111 > [< inline >] __rb_erase_augmented > include/linux/rbtree_augmented.h:218 > [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427 > [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86 > [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903 > [< inline >] remove_hrtimer kernel/time/hrtimer.c:945 > [] hrtimer_try_to_cancel+0x22a/0x570 > kernel/time/hrtimer.c:1046 > [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066 > [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417 > [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507 > [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106 > [] snd_pcm_action_single+0x76/0x120 > sound/core/pcm_native.c:956 > [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974 > [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139 > [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784 > [] snd_pcm_common_ioctl1+0xfae/0x2150 > sound/core/pcm_native.c:2805 > [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 > sound/core/pcm_native.c:2976 > [] snd_pcm_kernel_ioctl+0x11c/0x160 > sound/core/pcm_native.c:3020 > [] snd_pcm_oss_sync+0x3a4/0xa30 > sound/core/oss/pcm_oss.c:1693 > [] snd_pcm_oss_release+0x1ad/0x280 > sound/core/oss/pcm_oss.c:2483 > . A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which is called certainly before other blocking ops. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Ben Hutchings --- sound/drivers/dummy.c | 1 + 1 file changed, 1 insertion(+) --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -422,6 +422,7 @@ static int dummy_hrtimer_stop(struct snd static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { + hrtimer_cancel(>timer); tasklet_kill(>tasklet); }
[PATCH 3.2 05/94] alpha/PCI: Call iomem_is_exclusive() for IORESOURCE_MEM, but not IORESOURCE_IO
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Bjorn Helgaascommit c20e128030caf0537d5e906753eac1c28fefdb75 upstream. The alpha pci_mmap_resource() is used for both IORESOURCE_MEM and IORESOURCE_IO resources, but iomem_is_exclusive() is only applicable for IORESOURCE_MEM. Call iomem_is_exclusive() only for IORESOURCE_MEM resources, and do it earlier to match the generic version of pci_mmap_resource(). Fixes: 10a0ef39fbd1 ("PCI/alpha: pci sysfs resources") Signed-off-by: Bjorn Helgaas CC: Ivan Kokshaysky Signed-off-by: Ben Hutchings --- arch/alpha/kernel/pci-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -78,10 +78,10 @@ static int pci_mmap_resource(struct kobj if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (!__pci_mmap_fits(pdev, i, vma, sparse)) + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start)) return -EINVAL; - if (iomem_is_exclusive(res->start)) + if (!__pci_mmap_fits(pdev, i, vma, sparse)) return -EINVAL; pcibios_resource_to_bus(pdev, , res);
[PATCH 3.16 270/305] xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Jan Beulichcommit 6f2d9d99213514360034c6d52d2c3919290b3504 upstream. As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and CPUID[0x8007].EDX[7] anymore, causing the driver to fail to load on both Intel and AMD systems. Doing any kind of hardware capability checks in the driver as a prerequisite was wrong anyway: With the hypervisor being in charge, all such checking should be done by it. If ACPI data gets uploaded despite some missing capability, the hypervisor is free to ignore part or all of that data. Ditch the entire check_prereq() function, and do the only valid check (xen_initial_domain()) in the caller in its place. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Ben Hutchings --- drivers/xen/xen-acpi-processor.c | 35 +++ 1 file changed, 3 insertions(+), 32 deletions(-) --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -423,36 +423,7 @@ upload: return 0; } -static int __init check_prereq(void) -{ - struct cpuinfo_x86 *c = _data(0); - - if (!xen_initial_domain()) - return -ENODEV; - - if (!acpi_gbl_FADT.smi_command) - return -ENODEV; - if (c->x86_vendor == X86_VENDOR_INTEL) { - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - return 0; - } - if (c->x86_vendor == X86_VENDOR_AMD) { - /* Copied from powernow-k8.h, can't include ../cpufreq/powernow -* as we get compile warnings for the static functions. -*/ -#define CPUID_FREQ_VOLT_CAPABILITIES0x8007 -#define USE_HW_PSTATE 0x0080 - u32 eax, ebx, ecx, edx; - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, , , , ); - if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) - return -ENODEV; - return 0; - } - return -ENODEV; -} /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; @@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor static int __init xen_acpi_processor_init(void) { unsigned int i; - int rc = check_prereq(); + int rc; - if (rc) - return rc; + if (!xen_initial_domain()) + return -ENODEV; nr_acpi_bits = get_max_acpi_id() + 1; acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
[PATCH 3.2 05/94] alpha/PCI: Call iomem_is_exclusive() for IORESOURCE_MEM, but not IORESOURCE_IO
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Bjorn Helgaas commit c20e128030caf0537d5e906753eac1c28fefdb75 upstream. The alpha pci_mmap_resource() is used for both IORESOURCE_MEM and IORESOURCE_IO resources, but iomem_is_exclusive() is only applicable for IORESOURCE_MEM. Call iomem_is_exclusive() only for IORESOURCE_MEM resources, and do it earlier to match the generic version of pci_mmap_resource(). Fixes: 10a0ef39fbd1 ("PCI/alpha: pci sysfs resources") Signed-off-by: Bjorn Helgaas CC: Ivan Kokshaysky Signed-off-by: Ben Hutchings --- arch/alpha/kernel/pci-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -78,10 +78,10 @@ static int pci_mmap_resource(struct kobj if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (!__pci_mmap_fits(pdev, i, vma, sparse)) + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start)) return -EINVAL; - if (iomem_is_exclusive(res->start)) + if (!__pci_mmap_fits(pdev, i, vma, sparse)) return -EINVAL; pcibios_resource_to_bus(pdev, , res);
[PATCH 3.16 270/305] xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Jan Beulich commit 6f2d9d99213514360034c6d52d2c3919290b3504 upstream. As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and CPUID[0x8007].EDX[7] anymore, causing the driver to fail to load on both Intel and AMD systems. Doing any kind of hardware capability checks in the driver as a prerequisite was wrong anyway: With the hypervisor being in charge, all such checking should be done by it. If ACPI data gets uploaded despite some missing capability, the hypervisor is free to ignore part or all of that data. Ditch the entire check_prereq() function, and do the only valid check (xen_initial_domain()) in the caller in its place. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Ben Hutchings --- drivers/xen/xen-acpi-processor.c | 35 +++ 1 file changed, 3 insertions(+), 32 deletions(-) --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -423,36 +423,7 @@ upload: return 0; } -static int __init check_prereq(void) -{ - struct cpuinfo_x86 *c = _data(0); - - if (!xen_initial_domain()) - return -ENODEV; - - if (!acpi_gbl_FADT.smi_command) - return -ENODEV; - if (c->x86_vendor == X86_VENDOR_INTEL) { - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - return 0; - } - if (c->x86_vendor == X86_VENDOR_AMD) { - /* Copied from powernow-k8.h, can't include ../cpufreq/powernow -* as we get compile warnings for the static functions. -*/ -#define CPUID_FREQ_VOLT_CAPABILITIES0x8007 -#define USE_HW_PSTATE 0x0080 - u32 eax, ebx, ecx, edx; - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, , , , ); - if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) - return -ENODEV; - return 0; - } - return -ENODEV; -} /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; @@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor static int __init xen_acpi_processor_init(void) { unsigned int i; - int rc = check_prereq(); + int rc; - if (rc) - return rc; + if (!xen_initial_domain()) + return -ENODEV; nr_acpi_bits = get_max_acpi_id() + 1; acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
[PATCH 3.16 133/305] powerpc: Use privileged SPR number for MMCR2
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Thomas Huthcommit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream. We are already using the privileged versions of MMCR0, MMCR1 and MMCRA in the kernel, so for MMCR2, we should better use the privileged versions, too, to be consistent. Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8") Suggested-by: Paul Mackerras Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Ben Hutchings --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -704,7 +704,7 @@ #define MMCR0_FCWAIT 0x0002UL /* freeze counter in WAIT state */ #define MMCR0_FCHV 0x0001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 -#define SPRN_MMCR2 769 +#define SPRN_MMCR2 785 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x8000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x4000UL
[PATCH 3.16 133/305] powerpc: Use privileged SPR number for MMCR2
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: Thomas Huth commit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream. We are already using the privileged versions of MMCR0, MMCR1 and MMCRA in the kernel, so for MMCR2, we should better use the privileged versions, too, to be consistent. Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8") Suggested-by: Paul Mackerras Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Ben Hutchings --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -704,7 +704,7 @@ #define MMCR0_FCWAIT 0x0002UL /* freeze counter in WAIT state */ #define MMCR0_FCHV 0x0001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 -#define SPRN_MMCR2 769 +#define SPRN_MMCR2 785 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x8000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x4000UL
[PATCH 3.2 28/94] fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Stefan Metzmachercommit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 upstream. Only server which map unknown users to guest will allow access using a non-null NTLMv2_Response. For Samba it's the "map to guest = bad user" option. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11913 Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French [bwh: Backported to 3.2: - Adjust context, indentation - Keep using cERROR()] Signed-off-by: Ben Hutchings --- --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -727,21 +727,26 @@ ssetup_ntlmssp_authenticate: /* LM2 password would be here if we supported it */ pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; - /* calculate nlmv2 response and session key */ - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cERROR(1, "Error %d during NTLMv2 authentication", rc); - goto ssetup_exit; - } - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - ses->auth_key.len - CIFS_SESS_KEY_SIZE); - bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; + if (ses->user_name != NULL) { + /* calculate nlmv2 response and session key */ + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cERROR(1, "Error %d during NTLMv2 authentication", rc); + goto ssetup_exit; + } + + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ses->auth_key.len - CIFS_SESS_KEY_SIZE); + bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; - /* set case sensitive password length after tilen may get -* assigned, tilen is 0 otherwise. -*/ - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + /* set case sensitive password length after tilen may get +* assigned, tilen is 0 otherwise. +*/ + pSMB->req_no_secext.CaseSensitivePasswordLength = + cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + } else { + pSMB->req_no_secext.CaseSensitivePasswordLength = 0; + } if (ses->capabilities & CAP_UNICODE) { if (iov[0].iov_len % 2) {
[PATCH 3.16 259/305] x86/power/64: Fix kernel text mapping corruption during image restoration
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: "Rafael J. Wysocki"commit 65c0554b73c920023cc8998802e508b798113b46 upstream. Logan Gunthorpe reports that hibernation stopped working reliably for him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata). That turns out to be a consequence of a long-standing issue with the 64-bit image restoration code on x86, which is that the temporary page tables set up by it to avoid page tables corruption when the last bits of the image kernel's memory contents are copied into their original page frames re-use the boot kernel's text mapping, but that mapping may very well get corrupted just like any other part of the page tables. Of course, if that happens, the final jump to the image kernel's entry point will go to nowhere. The exact reason why commit ab76f7b4ab23 matters here is that it sometimes causes a PMD of a large page to be split into PTEs that are allocated dynamically and get corrupted during image restoration as described above. To fix that issue note that the code copying the last bits of the image kernel's memory contents to the page frames occupied by them previoulsy doesn't use the kernel text mapping, because it runs from a special page covered by the identity mapping set up for that code from scratch. Hence, the kernel text mapping is only needed before that code starts to run and then it will only be used just for the final jump to the image kernel's entry point. Accordingly, the temporary page tables set up in swsusp_arch_resume() on x86-64 need to contain the kernel text mapping too. That mapping is only going to be used for the final jump to the image kernel, so it only needs to cover the image kernel's entry point, because the first thing the image kernel does after getting control back is to switch over to its own original page tables. Moreover, the virtual address of the image kernel's entry point in that mapping has to be the same as the one mapped by the image kernel's page tables. With that in mind, modify the x86-64's arch_hibernation_header_save() and arch_hibernation_header_restore() routines to pass the physical address of the image kernel's entry point (in addition to its virtual address) to the boot kernel (a small piece of assembly code involved in passing the entry point's virtual address to the image kernel is not necessary any more after that, so drop it). Update RESTORE_MAGIC too to reflect the image header format change. Next, in set_up_temporary_mappings(), use the physical and virtual addresses of the image kernel's entry point passed in the image header to set up a minimum kernel text mapping (using memory pages that won't be overwritten by the image kernel's memory contents) that will map those addresses to each other as appropriate. This makes the concern about the possible corruption of the original boot kernel text mapping go away and if the the minimum kernel text mapping used for the final jump marks the image kernel's entry point memory as executable, the jump to it is guaraneed to succeed. Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata) Link: http://marc.info/?l=linux-pm=146372852823760=2 Reported-by: Logan Gunthorpe Reported-and-tested-by: Borislav Petkov Tested-by: Kees Cook Signed-off-by: Rafael J. Wysocki [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings --- arch/x86/power/hibernate_64.c | 97 ++- arch/x86/power/hibernate_asm_64.S | 55 ++ 2 files changed, 109 insertions(+), 43 deletions(-) --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_ * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* +* The new mapping only has to cover the page containing the image +* kernel's entry point (jump_address_phys), because the switch over to +* it is carried out by relocated code running from a page allocated +* specifically for this purpose and covered by the identity mapping, so +* the temporary kernel text mapping is only needed for the final jump. +
[PATCH 3.2 28/94] fs/cifs: correctly to anonymous authentication for the NTLM(v2) authentication
3.2.82-rc1 review patch. If anyone has any objections, please let me know. -- From: Stefan Metzmacher commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 upstream. Only server which map unknown users to guest will allow access using a non-null NTLMv2_Response. For Samba it's the "map to guest = bad user" option. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11913 Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French [bwh: Backported to 3.2: - Adjust context, indentation - Keep using cERROR()] Signed-off-by: Ben Hutchings --- --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -727,21 +727,26 @@ ssetup_ntlmssp_authenticate: /* LM2 password would be here if we supported it */ pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; - /* calculate nlmv2 response and session key */ - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cERROR(1, "Error %d during NTLMv2 authentication", rc); - goto ssetup_exit; - } - memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, - ses->auth_key.len - CIFS_SESS_KEY_SIZE); - bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; + if (ses->user_name != NULL) { + /* calculate nlmv2 response and session key */ + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cERROR(1, "Error %d during NTLMv2 authentication", rc); + goto ssetup_exit; + } + + memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ses->auth_key.len - CIFS_SESS_KEY_SIZE); + bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; - /* set case sensitive password length after tilen may get -* assigned, tilen is 0 otherwise. -*/ - pSMB->req_no_secext.CaseSensitivePasswordLength = - cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + /* set case sensitive password length after tilen may get +* assigned, tilen is 0 otherwise. +*/ + pSMB->req_no_secext.CaseSensitivePasswordLength = + cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE); + } else { + pSMB->req_no_secext.CaseSensitivePasswordLength = 0; + } if (ses->capabilities & CAP_UNICODE) { if (iov[0].iov_len % 2) {
[PATCH 3.16 259/305] x86/power/64: Fix kernel text mapping corruption during image restoration
3.16.37-rc1 review patch. If anyone has any objections, please let me know. -- From: "Rafael J. Wysocki" commit 65c0554b73c920023cc8998802e508b798113b46 upstream. Logan Gunthorpe reports that hibernation stopped working reliably for him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata). That turns out to be a consequence of a long-standing issue with the 64-bit image restoration code on x86, which is that the temporary page tables set up by it to avoid page tables corruption when the last bits of the image kernel's memory contents are copied into their original page frames re-use the boot kernel's text mapping, but that mapping may very well get corrupted just like any other part of the page tables. Of course, if that happens, the final jump to the image kernel's entry point will go to nowhere. The exact reason why commit ab76f7b4ab23 matters here is that it sometimes causes a PMD of a large page to be split into PTEs that are allocated dynamically and get corrupted during image restoration as described above. To fix that issue note that the code copying the last bits of the image kernel's memory contents to the page frames occupied by them previoulsy doesn't use the kernel text mapping, because it runs from a special page covered by the identity mapping set up for that code from scratch. Hence, the kernel text mapping is only needed before that code starts to run and then it will only be used just for the final jump to the image kernel's entry point. Accordingly, the temporary page tables set up in swsusp_arch_resume() on x86-64 need to contain the kernel text mapping too. That mapping is only going to be used for the final jump to the image kernel, so it only needs to cover the image kernel's entry point, because the first thing the image kernel does after getting control back is to switch over to its own original page tables. Moreover, the virtual address of the image kernel's entry point in that mapping has to be the same as the one mapped by the image kernel's page tables. With that in mind, modify the x86-64's arch_hibernation_header_save() and arch_hibernation_header_restore() routines to pass the physical address of the image kernel's entry point (in addition to its virtual address) to the boot kernel (a small piece of assembly code involved in passing the entry point's virtual address to the image kernel is not necessary any more after that, so drop it). Update RESTORE_MAGIC too to reflect the image header format change. Next, in set_up_temporary_mappings(), use the physical and virtual addresses of the image kernel's entry point passed in the image header to set up a minimum kernel text mapping (using memory pages that won't be overwritten by the image kernel's memory contents) that will map those addresses to each other as appropriate. This makes the concern about the possible corruption of the original boot kernel text mapping go away and if the the minimum kernel text mapping used for the final jump marks the image kernel's entry point memory as executable, the jump to it is guaraneed to succeed. Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata) Link: http://marc.info/?l=linux-pm=146372852823760=2 Reported-by: Logan Gunthorpe Reported-and-tested-by: Borislav Petkov Tested-by: Kees Cook Signed-off-by: Rafael J. Wysocki [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings --- arch/x86/power/hibernate_64.c | 97 ++- arch/x86/power/hibernate_asm_64.S | 55 ++ 2 files changed, 109 insertions(+), 43 deletions(-) --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_ * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* +* The new mapping only has to cover the page containing the image +* kernel's entry point (jump_address_phys), because the switch over to +* it is carried out by relocated code running from a page allocated +* specifically for this purpose and covered by the identity mapping, so +* the temporary kernel text mapping is only needed for the final jump. +* Moreover, in that mapping the virtual address of the image kernel's +* entry point must be the same as its virtual
[PATCH] pinctrl: meson: get rid of unneeded domain structures
The driver originally supported more domains (register ranges) per pinctrl device, but since commit 9dab1868ec0d ("pinctrl: amlogic: Make driver independent from two-domain configuration") each device gets assigned a single domain and we instantiate multiple pinctrl devices in the DT. Therefore, now the 'meson_domain' and 'meson_domain_data' structures don't have any reason to exist and can be removed to make the model simpler to understand. This commit doesn't change behavior. Tested on a Odroid-C2. Signed-off-by: Beniamino Galvani--- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 24 +--- drivers/pinctrl/meson/pinctrl-meson.c | 222 +++-- drivers/pinctrl/meson/pinctrl-meson.h | 52 ++- drivers/pinctrl/meson/pinctrl-meson8.c | 24 +--- drivers/pinctrl/meson/pinctrl-meson8b.c| 24 +--- 5 files changed, 118 insertions(+), 228 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index cb4d6ad..233c9c8 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -556,38 +556,28 @@ static struct meson_bank meson_gxbb_aobus_banks[] = { BANK("AO", PIN(GPIOAO_0, 0), PIN(GPIOAO_13, 0), 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), }; -static struct meson_domain_data meson_gxbb_periphs_domain_data = { +struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { .name = "periphs-banks", - .banks = meson_gxbb_periphs_banks, - .num_banks = ARRAY_SIZE(meson_gxbb_periphs_banks), .pin_base = 14, - .num_pins = 120, -}; - -static struct meson_domain_data meson_gxbb_aobus_domain_data = { - .name = "aobus-banks", - .banks = meson_gxbb_aobus_banks, - .num_banks = ARRAY_SIZE(meson_gxbb_aobus_banks), - .pin_base = 0, - .num_pins = 14, -}; - -struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { .pins = meson_gxbb_periphs_pins, .groups = meson_gxbb_periphs_groups, .funcs = meson_gxbb_periphs_functions, - .domain_data= _gxbb_periphs_domain_data, + .banks = meson_gxbb_periphs_banks, .num_pins = ARRAY_SIZE(meson_gxbb_periphs_pins), .num_groups = ARRAY_SIZE(meson_gxbb_periphs_groups), .num_funcs = ARRAY_SIZE(meson_gxbb_periphs_functions), + .num_banks = ARRAY_SIZE(meson_gxbb_periphs_banks), }; struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data = { + .name = "aobus-banks", + .pin_base = 0, .pins = meson_gxbb_aobus_pins, .groups = meson_gxbb_aobus_groups, .funcs = meson_gxbb_aobus_functions, - .domain_data= _gxbb_aobus_domain_data, + .banks = meson_gxbb_aobus_banks, .num_pins = ARRAY_SIZE(meson_gxbb_aobus_pins), .num_groups = ARRAY_SIZE(meson_gxbb_aobus_groups), .num_funcs = ARRAY_SIZE(meson_gxbb_aobus_functions), + .num_banks = ARRAY_SIZE(meson_gxbb_aobus_banks), }; diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 11623c6..9678599 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -21,9 +21,8 @@ * domain which can't be powered off; the bank also uses a set of * registers different from the other banks. * - * For each of the two power domains (regular and always-on) there are - * 4 different register ranges that control the following properties - * of the pins: + * For each pin controller there are 4 different register ranges that + * control the following properties of the pins: * 1) pin muxing * 2) pull enable/disable * 3) pull up/down @@ -33,8 +32,8 @@ * direction are the same and thus there are only 3 register ranges. * * Every pinmux group can be enabled by a specific bit in the first - * register range of the domain; when all groups for a given pin are - * disabled the pin acts as a GPIO. + * register range; when all groups for a given pin are disabled the + * pin acts as a GPIO. * * For the pull and GPIO configuration every bank uses a contiguous * set of bits in the register sets described above; the same register @@ -66,21 +65,21 @@ /** * meson_get_bank() - find the bank containing a given pin * - * @domain:the domain containing the pin + * @pc:the pinctrl instance * @pin: the pin number * @bank: the found bank * * Return: 0 on success, a negative value on error */ -static int meson_get_bank(struct meson_domain *domain, unsigned int pin, +static int meson_get_bank(struct meson_pinctrl *pc, unsigned int pin, struct meson_bank **bank) { int i; - for (i = 0; i < domain->data->num_banks; i++) { -
[PATCH] pinctrl: meson: get rid of unneeded domain structures
The driver originally supported more domains (register ranges) per pinctrl device, but since commit 9dab1868ec0d ("pinctrl: amlogic: Make driver independent from two-domain configuration") each device gets assigned a single domain and we instantiate multiple pinctrl devices in the DT. Therefore, now the 'meson_domain' and 'meson_domain_data' structures don't have any reason to exist and can be removed to make the model simpler to understand. This commit doesn't change behavior. Tested on a Odroid-C2. Signed-off-by: Beniamino Galvani --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 24 +--- drivers/pinctrl/meson/pinctrl-meson.c | 222 +++-- drivers/pinctrl/meson/pinctrl-meson.h | 52 ++- drivers/pinctrl/meson/pinctrl-meson8.c | 24 +--- drivers/pinctrl/meson/pinctrl-meson8b.c| 24 +--- 5 files changed, 118 insertions(+), 228 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index cb4d6ad..233c9c8 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -556,38 +556,28 @@ static struct meson_bank meson_gxbb_aobus_banks[] = { BANK("AO", PIN(GPIOAO_0, 0), PIN(GPIOAO_13, 0), 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), }; -static struct meson_domain_data meson_gxbb_periphs_domain_data = { +struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { .name = "periphs-banks", - .banks = meson_gxbb_periphs_banks, - .num_banks = ARRAY_SIZE(meson_gxbb_periphs_banks), .pin_base = 14, - .num_pins = 120, -}; - -static struct meson_domain_data meson_gxbb_aobus_domain_data = { - .name = "aobus-banks", - .banks = meson_gxbb_aobus_banks, - .num_banks = ARRAY_SIZE(meson_gxbb_aobus_banks), - .pin_base = 0, - .num_pins = 14, -}; - -struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { .pins = meson_gxbb_periphs_pins, .groups = meson_gxbb_periphs_groups, .funcs = meson_gxbb_periphs_functions, - .domain_data= _gxbb_periphs_domain_data, + .banks = meson_gxbb_periphs_banks, .num_pins = ARRAY_SIZE(meson_gxbb_periphs_pins), .num_groups = ARRAY_SIZE(meson_gxbb_periphs_groups), .num_funcs = ARRAY_SIZE(meson_gxbb_periphs_functions), + .num_banks = ARRAY_SIZE(meson_gxbb_periphs_banks), }; struct meson_pinctrl_data meson_gxbb_aobus_pinctrl_data = { + .name = "aobus-banks", + .pin_base = 0, .pins = meson_gxbb_aobus_pins, .groups = meson_gxbb_aobus_groups, .funcs = meson_gxbb_aobus_functions, - .domain_data= _gxbb_aobus_domain_data, + .banks = meson_gxbb_aobus_banks, .num_pins = ARRAY_SIZE(meson_gxbb_aobus_pins), .num_groups = ARRAY_SIZE(meson_gxbb_aobus_groups), .num_funcs = ARRAY_SIZE(meson_gxbb_aobus_functions), + .num_banks = ARRAY_SIZE(meson_gxbb_aobus_banks), }; diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 11623c6..9678599 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -21,9 +21,8 @@ * domain which can't be powered off; the bank also uses a set of * registers different from the other banks. * - * For each of the two power domains (regular and always-on) there are - * 4 different register ranges that control the following properties - * of the pins: + * For each pin controller there are 4 different register ranges that + * control the following properties of the pins: * 1) pin muxing * 2) pull enable/disable * 3) pull up/down @@ -33,8 +32,8 @@ * direction are the same and thus there are only 3 register ranges. * * Every pinmux group can be enabled by a specific bit in the first - * register range of the domain; when all groups for a given pin are - * disabled the pin acts as a GPIO. + * register range; when all groups for a given pin are disabled the + * pin acts as a GPIO. * * For the pull and GPIO configuration every bank uses a contiguous * set of bits in the register sets described above; the same register @@ -66,21 +65,21 @@ /** * meson_get_bank() - find the bank containing a given pin * - * @domain:the domain containing the pin + * @pc:the pinctrl instance * @pin: the pin number * @bank: the found bank * * Return: 0 on success, a negative value on error */ -static int meson_get_bank(struct meson_domain *domain, unsigned int pin, +static int meson_get_bank(struct meson_pinctrl *pc, unsigned int pin, struct meson_bank **bank) { int i; - for (i = 0; i < domain->data->num_banks; i++) { - if (pin
Re: [dm-devel] [RFC PATCH 2/2] mm, mempool: do not throttle PF_LESS_THROTTLE tasks
On Fri, 12 Aug 2016, Michal Hocko wrote: > On Thu 04-08-16 14:49:41, Mikulas Patocka wrote: > > > On Wed, 3 Aug 2016, Michal Hocko wrote: > > > > > But the device congestion is not the only condition required for the > > > throttling. The pgdat has also be marked congested which means that the > > > LRU page scanner bumped into dirty/writeback/pg_reclaim pages at the > > > tail of the LRU. That should only happen if we are rotating LRUs too > > > quickly. AFAIU the reclaim shouldn't allow free ticket scanning in that > > > situation. > > > > The obvious problem here is that mempool allocations should sleep in > > mempool_alloc() on >wait (until someone returns some entries into > > the mempool), they should not sleep inside the page allocator. > > I agree that mempool_alloc should _primarily_ sleep on their own > throttling mechanism. I am not questioning that. I am just saying that > the page allocator has its own throttling which it relies on and that > cannot be just ignored because that might have other undesirable side > effects. So if the right approach is really to never throttle certain > requests then we have to bail out from a congested nodes/zones as soon > as the congestion is detected. > > Now, I would like to see that something like that is _really_ necessary. Currently, it is not a problem - device mapper reports the device as congested only if the underlying physical disks are congested. But once we change it so that device mapper reports congested state on its own (when it has too many bios in progress), this starts being a problem. I would add PF_NO_THROTTLE or __GFP_NO_THROTTLE to mempool_alloc. Or - we can prevent the memory reclaim from throttling if we see both __GFP_NOMEMALLOC and __GFP_NORETRY - that would be sufficient to detect mempool_alloc usage and it wouldn't hurt other __GFP_NORETRY users. Mikulas > I believe that we should simply start with easier part and get rid of > throttle_vm_writeout because that seems like a left over from the past. > If that turns out unsatisfactory and we have clear picture when the > throttling is harmful/suboptimal then we can move on with a more complex > solution. Does this sound like a way forward? > > -- > Michal Hocko > SUSE Labs
Re: [dm-devel] [RFC PATCH 2/2] mm, mempool: do not throttle PF_LESS_THROTTLE tasks
On Fri, 12 Aug 2016, Michal Hocko wrote: > On Thu 04-08-16 14:49:41, Mikulas Patocka wrote: > > > On Wed, 3 Aug 2016, Michal Hocko wrote: > > > > > But the device congestion is not the only condition required for the > > > throttling. The pgdat has also be marked congested which means that the > > > LRU page scanner bumped into dirty/writeback/pg_reclaim pages at the > > > tail of the LRU. That should only happen if we are rotating LRUs too > > > quickly. AFAIU the reclaim shouldn't allow free ticket scanning in that > > > situation. > > > > The obvious problem here is that mempool allocations should sleep in > > mempool_alloc() on >wait (until someone returns some entries into > > the mempool), they should not sleep inside the page allocator. > > I agree that mempool_alloc should _primarily_ sleep on their own > throttling mechanism. I am not questioning that. I am just saying that > the page allocator has its own throttling which it relies on and that > cannot be just ignored because that might have other undesirable side > effects. So if the right approach is really to never throttle certain > requests then we have to bail out from a congested nodes/zones as soon > as the congestion is detected. > > Now, I would like to see that something like that is _really_ necessary. Currently, it is not a problem - device mapper reports the device as congested only if the underlying physical disks are congested. But once we change it so that device mapper reports congested state on its own (when it has too many bios in progress), this starts being a problem. I would add PF_NO_THROTTLE or __GFP_NO_THROTTLE to mempool_alloc. Or - we can prevent the memory reclaim from throttling if we see both __GFP_NOMEMALLOC and __GFP_NORETRY - that would be sufficient to detect mempool_alloc usage and it wouldn't hurt other __GFP_NORETRY users. Mikulas > I believe that we should simply start with easier part and get rid of > throttle_vm_writeout because that seems like a left over from the past. > If that turns out unsatisfactory and we have clear picture when the > throttling is harmful/suboptimal then we can move on with a more complex > solution. Does this sound like a way forward? > > -- > Michal Hocko > SUSE Labs
Re: staging: ks7010: Replace three printk() calls by pr_err()
On Sat, 2016-08-13 at 13:10 +0200, SF Markus Elfring wrote: > > > Prefer usage of the macro "pr_err" over the interface "printk". > > Not correct > A checkpatch warning like "PREFER_PR_LEVEL" can point additional > possibilities out > for this use case. > Would you like to introduce any of the higher level logging functions instead? I think pr_ is OK if reworking the code to support dev_ is not easy. > > > diff --git a/drivers/staging/ks7010/ks7010_sdio.c > > > b/drivers/staging/ks7010/ks7010_sdio.c > > [] > > > > > > @@ -998,11 +998,11 @@ static int ks7010_sdio_probe(struct sdio_func *func, > > > /* private memory allocate */ > > > netdev = alloc_etherdev(sizeof(*priv)); > > > if (netdev == NULL) { > > > - printk(KERN_ERR "ks7010 : Unable to alloc new net device\n"); > > > + pr_err(pr_fmt("Unable to alloc new net device\n")); > > All of these pr_fmt uses are redundant as pr_err already does pr_fmt > Thanks for your reminder. > > Would you accept that another update will be appended to the discussed patch > series? No. Patches should not knowingly introduce defects that are corrected in follow-on patches. > > alloc_etherdev already does a dump_stack so the OOM isn't useful. > Does this information indicate that this printk() (or pr_err()) call > should be deleted? Markus, I don't know if it's your lack of English comprehension or not, but it's fairly obvious from my reply that this line should be deleted, either in this patch or a follow-on.
Re: staging: ks7010: Replace three printk() calls by pr_err()
On Sat, 2016-08-13 at 13:10 +0200, SF Markus Elfring wrote: > > > Prefer usage of the macro "pr_err" over the interface "printk". > > Not correct > A checkpatch warning like "PREFER_PR_LEVEL" can point additional > possibilities out > for this use case. > Would you like to introduce any of the higher level logging functions instead? I think pr_ is OK if reworking the code to support dev_ is not easy. > > > diff --git a/drivers/staging/ks7010/ks7010_sdio.c > > > b/drivers/staging/ks7010/ks7010_sdio.c > > [] > > > > > > @@ -998,11 +998,11 @@ static int ks7010_sdio_probe(struct sdio_func *func, > > > /* private memory allocate */ > > > netdev = alloc_etherdev(sizeof(*priv)); > > > if (netdev == NULL) { > > > - printk(KERN_ERR "ks7010 : Unable to alloc new net device\n"); > > > + pr_err(pr_fmt("Unable to alloc new net device\n")); > > All of these pr_fmt uses are redundant as pr_err already does pr_fmt > Thanks for your reminder. > > Would you accept that another update will be appended to the discussed patch > series? No. Patches should not knowingly introduce defects that are corrected in follow-on patches. > > alloc_etherdev already does a dump_stack so the OOM isn't useful. > Does this information indicate that this printk() (or pr_err()) call > should be deleted? Markus, I don't know if it's your lack of English comprehension or not, but it's fairly obvious from my reply that this line should be deleted, either in this patch or a follow-on.
Re: [PATCH v3 0/7] x86: Rewrite switch_to()
On Sat, Aug 13, 2016 at 9:38 AM, Brian Gerstwrote: > This patch set simplifies the switch_to() code, by moving the stack switch > code out of line into an asm stub before calling __switch_to(). This ends > up being more readable, and using the C calling convention instead of > clobbering all registers improves code generation. It also allows newly > forked processes to construct a special stack frame to seamlessly flow > to ret_from_fork, instead of using a test and branch, or an unbalanced > call/ret. Do you have performance numbers? Is it noticeable/measurable? Linus
Re: [PATCH v3 0/7] x86: Rewrite switch_to()
On Sat, Aug 13, 2016 at 9:38 AM, Brian Gerst wrote: > This patch set simplifies the switch_to() code, by moving the stack switch > code out of line into an asm stub before calling __switch_to(). This ends > up being more readable, and using the C calling convention instead of > clobbering all registers improves code generation. It also allows newly > forked processes to construct a special stack frame to seamlessly flow > to ret_from_fork, instead of using a test and branch, or an unbalanced > call/ret. Do you have performance numbers? Is it noticeable/measurable? Linus
[PATCH 1/3] befs: fix typos in datastream.c
Signed-off-by: Luis de Bethencourt--- Hi, This is a series of patches fixing small issues in datastream.c. On the process of doing the same for the rest of files. To finish cleanup and start adding documentation and new features. Thanks, Luis fs/befs/datastream.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 6889644..b2eb5b5 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -37,7 +37,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb, /** * befs_read_datastream - get buffer_head containing data, starting from pos. * @sb: Filesystem superblock - * @ds: datastrem to find data with + * @ds: datastream to find data with * @pos: start of data * @off: offset of data in buffer_head->b_data * @@ -115,7 +115,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, /** * befs_read_lsmylink - read long symlink from datastream. * @sb: Filesystem superblock - * @ds: Datastrem to read from + * @ds: Datastream to read from * @buff: Buffer in which to place long symlink data * @len: Length of the long symlink in bytes * @@ -183,7 +183,7 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) metablocks += ds->indirect.len; /* - Double indir block, plus all the indirect blocks it mapps + Double indir block, plus all the indirect blocks it maps. In the double-indirect range, all block runs of data are BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know how many data block runs are in the double-indirect region, @@ -397,7 +397,7 @@ befs_find_brun_indirect(struct super_block *sb, though the double-indirect run may be several blocks long, we can calculate which of those blocks will contain the index we are after and only read that one. We then follow it to - the indirect block and perform a similar process to find + the indirect block and perform a similar process to find the actual block run that maps the data block we are interested in. -- 2.5.1
[PATCH 1/3] befs: fix typos in datastream.c
Signed-off-by: Luis de Bethencourt --- Hi, This is a series of patches fixing small issues in datastream.c. On the process of doing the same for the rest of files. To finish cleanup and start adding documentation and new features. Thanks, Luis fs/befs/datastream.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 6889644..b2eb5b5 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -37,7 +37,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb, /** * befs_read_datastream - get buffer_head containing data, starting from pos. * @sb: Filesystem superblock - * @ds: datastrem to find data with + * @ds: datastream to find data with * @pos: start of data * @off: offset of data in buffer_head->b_data * @@ -115,7 +115,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, /** * befs_read_lsmylink - read long symlink from datastream. * @sb: Filesystem superblock - * @ds: Datastrem to read from + * @ds: Datastream to read from * @buff: Buffer in which to place long symlink data * @len: Length of the long symlink in bytes * @@ -183,7 +183,7 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) metablocks += ds->indirect.len; /* - Double indir block, plus all the indirect blocks it mapps + Double indir block, plus all the indirect blocks it maps. In the double-indirect range, all block runs of data are BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know how many data block runs are in the double-indirect region, @@ -397,7 +397,7 @@ befs_find_brun_indirect(struct super_block *sb, though the double-indirect run may be several blocks long, we can calculate which of those blocks will contain the index we are after and only read that one. We then follow it to - the indirect block and perform a similar process to find + the indirect block and perform a similar process to find the actual block run that maps the data block we are interested in. -- 2.5.1
[PATCH 2/3] befs: improve documentation in datastream.c
Convert function descriptions to kernel-doc style. Signed-off-by: Luis de Bethencourt--- fs/befs/datastream.c | 193 ++- 1 file changed, 98 insertions(+), 95 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index b2eb5b5..5ce85cf 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, return bh; } -/* +/** + * befs_fblock2brun - give back block run for fblock + * @sb: the superblock + * @data: datastream to read from + * @fblock: the blocknumber with the file position to find + * @run: The found run is passed back through this pointer + * * Takes a file position and gives back a brun who's starting block * is block number fblock of the file. * @@ -212,36 +218,35 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) return blocks; } -/* - Finds the block run that starts at file block number blockno - in the file represented by the datastream data, if that - blockno is in the direct region of the datastream. - - sb: the superblock - data: the datastream - blockno: the blocknumber to find - run: The found run is passed back through this pointer - - Return value is BEFS_OK if the blockrun is found, BEFS_ERR - otherwise. - - Algorithm: - Linear search. Checks each element of array[] to see if it - contains the blockno-th filesystem block. This is necessary - because the block runs map variable amounts of data. Simply - keeps a count of the number of blocks searched so far (sum), - incrementing this by the length of each block run as we come - across it. Adds sum to *count before returning (this is so - you can search multiple arrays that are logicaly one array, - as in the indirect region code). - - When/if blockno is found, if blockno is inside of a block - run as stored on disk, we offset the start and length members - of the block run, so that blockno is the start and len is - still valid (the run ends in the same place). - - 2001-11-15 Will Dyson -*/ +/** + * befs_find_brun_direct - find a direct block run in the datastream + * @sb: the superblock + * @data: the datastream + * @blockno: the blocknumber to find + * @run: The found run is passed back through this pointer + * + * Finds the block run that starts at file block number blockno + * in the file represented by the datastream data, if that + * blockno is in the direct region of the datastream. + * + * Return value is BEFS_OK if the blockrun is found, BEFS_ERR + * otherwise. + * + * Algorithm: + * Linear search. Checks each element of array[] to see if it + * contains the blockno-th filesystem block. This is necessary + * because the block runs map variable amounts of data. Simply + * keeps a count of the number of blocks searched so far (sum), + * incrementing this by the length of each block run as we come + * across it. Adds sum to *count before returning (this is so + * you can search multiple arrays that are logicaly one array, + * as in the indirect region code). + * + * When/if blockno is found, if blockno is inside of a block + * run as stored on disk, we offset the start and length members + * of the block run, so that blockno is the start and len is + * still valid (the run ends in the same place). + */ static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run) @@ -273,29 +278,28 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, return BEFS_ERR; } -/* - Finds the block run that starts at file block number blockno - in the file represented by the datastream data, if that - blockno is in the indirect region of the datastream. - - sb: the superblock - data: the datastream - blockno: the blocknumber to find - run: The found run is passed back through this pointer - - Return value is BEFS_OK if the blockrun is found, BEFS_ERR - otherwise. - - Algorithm: - For each block in the indirect run of the datastream, read - it in and search through it for search_blk. - - XXX: - Really should check to make sure blockno is inside indirect - region. - - 2001-11-15 Will Dyson -*/ +/** + * befs_find_brun_indirect - find a block run in the datastream + * @sb: the superblock + * @data: the datastream + * @blockno: the blocknumber to find + * @run: The found run is passed back through this pointer + * + * Finds the block run that starts at file block number blockno + * in the file represented by the datastream data, if that + * blockno is in the indirect region of the datastream. + * +
[PATCH 2/3] befs: improve documentation in datastream.c
Convert function descriptions to kernel-doc style. Signed-off-by: Luis de Bethencourt --- fs/befs/datastream.c | 193 ++- 1 file changed, 98 insertions(+), 95 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index b2eb5b5..5ce85cf 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, return bh; } -/* +/** + * befs_fblock2brun - give back block run for fblock + * @sb: the superblock + * @data: datastream to read from + * @fblock: the blocknumber with the file position to find + * @run: The found run is passed back through this pointer + * * Takes a file position and gives back a brun who's starting block * is block number fblock of the file. * @@ -212,36 +218,35 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) return blocks; } -/* - Finds the block run that starts at file block number blockno - in the file represented by the datastream data, if that - blockno is in the direct region of the datastream. - - sb: the superblock - data: the datastream - blockno: the blocknumber to find - run: The found run is passed back through this pointer - - Return value is BEFS_OK if the blockrun is found, BEFS_ERR - otherwise. - - Algorithm: - Linear search. Checks each element of array[] to see if it - contains the blockno-th filesystem block. This is necessary - because the block runs map variable amounts of data. Simply - keeps a count of the number of blocks searched so far (sum), - incrementing this by the length of each block run as we come - across it. Adds sum to *count before returning (this is so - you can search multiple arrays that are logicaly one array, - as in the indirect region code). - - When/if blockno is found, if blockno is inside of a block - run as stored on disk, we offset the start and length members - of the block run, so that blockno is the start and len is - still valid (the run ends in the same place). - - 2001-11-15 Will Dyson -*/ +/** + * befs_find_brun_direct - find a direct block run in the datastream + * @sb: the superblock + * @data: the datastream + * @blockno: the blocknumber to find + * @run: The found run is passed back through this pointer + * + * Finds the block run that starts at file block number blockno + * in the file represented by the datastream data, if that + * blockno is in the direct region of the datastream. + * + * Return value is BEFS_OK if the blockrun is found, BEFS_ERR + * otherwise. + * + * Algorithm: + * Linear search. Checks each element of array[] to see if it + * contains the blockno-th filesystem block. This is necessary + * because the block runs map variable amounts of data. Simply + * keeps a count of the number of blocks searched so far (sum), + * incrementing this by the length of each block run as we come + * across it. Adds sum to *count before returning (this is so + * you can search multiple arrays that are logicaly one array, + * as in the indirect region code). + * + * When/if blockno is found, if blockno is inside of a block + * run as stored on disk, we offset the start and length members + * of the block run, so that blockno is the start and len is + * still valid (the run ends in the same place). + */ static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run) @@ -273,29 +278,28 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, return BEFS_ERR; } -/* - Finds the block run that starts at file block number blockno - in the file represented by the datastream data, if that - blockno is in the indirect region of the datastream. - - sb: the superblock - data: the datastream - blockno: the blocknumber to find - run: The found run is passed back through this pointer - - Return value is BEFS_OK if the blockrun is found, BEFS_ERR - otherwise. - - Algorithm: - For each block in the indirect run of the datastream, read - it in and search through it for search_blk. - - XXX: - Really should check to make sure blockno is inside indirect - region. - - 2001-11-15 Will Dyson -*/ +/** + * befs_find_brun_indirect - find a block run in the datastream + * @sb: the superblock + * @data: the datastream + * @blockno: the blocknumber to find + * @run: The found run is passed back through this pointer + * + * Finds the block run that starts at file block number blockno + * in the file represented by the datastream data, if that + * blockno is in the indirect region of the datastream. + * + * Return value is
[PATCH 3/3] befs: befs: fix style issues in datastream.c
Fixing the following checkpatch.pl errors: ERROR: "foo * bar" should be "foo *bar" +befs_blocknr_t blockno, befs_block_run * run); WARNING: Missing a blank line after declarations + struct buffer_head *bh; + befs_debug(sb, "---> %s length: %llu", __func__, len); WARNING: Block comments use * on subsequent lines + /* + Double indir block, plus all the indirect blocks it maps. (and other instances of these) Signed-off-by: Luis de Bethencourt--- fs/befs/datastream.c | 32 +--- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 5ce85cf..b4c7ba0 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -22,17 +22,17 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 }; static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, -befs_blocknr_t blockno, befs_block_run * run); +befs_blocknr_t blockno, befs_block_run *run); static int befs_find_brun_indirect(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, - befs_block_run * run); + befs_block_run *run); static int befs_find_brun_dblindirect(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, - befs_block_run * run); + befs_block_run *run); /** * befs_read_datastream - get buffer_head containing data, starting from pos. @@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb, */ struct buffer_head * befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, -befs_off_t pos, uint * off) +befs_off_t pos, uint *off) { struct buffer_head *bh; befs_block_run run; @@ -94,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, */ int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, -befs_blocknr_t fblock, befs_block_run * run) +befs_blocknr_t fblock, befs_block_run *run) { int err; befs_off_t pos = fblock << BEFS_SB(sb)->block_shift; @@ -134,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds, befs_off_t bytes_read = 0; /* bytes readed */ u16 plen; struct buffer_head *bh; + befs_debug(sb, "---> %s length: %llu", __func__, len); while (bytes_read < len) { @@ -189,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) metablocks += ds->indirect.len; /* - Double indir block, plus all the indirect blocks it maps. - In the double-indirect range, all block runs of data are - BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know - how many data block runs are in the double-indirect region, - and from that we know how many indirect blocks it takes to - map them. We assume that the indirect blocks are also - BEFS_DBLINDIR_BRUN_LEN blocks long. +* Double indir block, plus all the indirect blocks it maps. +* In the double-indirect range, all block runs of data are +* BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know +* how many data block runs are in the double-indirect region, +* and from that we know how many indirect blocks it takes to +* map them. We assume that the indirect blocks are also +* BEFS_DBLINDIR_BRUN_LEN blocks long. */ if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) { uint dbl_bytes; @@ -249,7 +250,7 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) */ static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, - befs_blocknr_t blockno, befs_block_run * run) + befs_blocknr_t blockno, befs_block_run *run) { int i; const befs_block_run *array = data->direct; @@ -261,6 +262,7 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, sum += array[i].len, i++) { if (blockno >= sum && blockno < sum + (array[i].len)) { int offset = blockno - sum; + run->allocation_group = array[i].allocation_group; run->start = array[i].start + offset; run->len = array[i].len - offset; @@ -304,7 +306,7 @@ static int befs_find_brun_indirect(struct super_block *sb,
[PATCH 3/3] befs: befs: fix style issues in datastream.c
Fixing the following checkpatch.pl errors: ERROR: "foo * bar" should be "foo *bar" +befs_blocknr_t blockno, befs_block_run * run); WARNING: Missing a blank line after declarations + struct buffer_head *bh; + befs_debug(sb, "---> %s length: %llu", __func__, len); WARNING: Block comments use * on subsequent lines + /* + Double indir block, plus all the indirect blocks it maps. (and other instances of these) Signed-off-by: Luis de Bethencourt --- fs/befs/datastream.c | 32 +--- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 5ce85cf..b4c7ba0 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -22,17 +22,17 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 }; static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, -befs_blocknr_t blockno, befs_block_run * run); +befs_blocknr_t blockno, befs_block_run *run); static int befs_find_brun_indirect(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, - befs_block_run * run); + befs_block_run *run); static int befs_find_brun_dblindirect(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, - befs_block_run * run); + befs_block_run *run); /** * befs_read_datastream - get buffer_head containing data, starting from pos. @@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb, */ struct buffer_head * befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, -befs_off_t pos, uint * off) +befs_off_t pos, uint *off) { struct buffer_head *bh; befs_block_run run; @@ -94,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, */ int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, -befs_blocknr_t fblock, befs_block_run * run) +befs_blocknr_t fblock, befs_block_run *run) { int err; befs_off_t pos = fblock << BEFS_SB(sb)->block_shift; @@ -134,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds, befs_off_t bytes_read = 0; /* bytes readed */ u16 plen; struct buffer_head *bh; + befs_debug(sb, "---> %s length: %llu", __func__, len); while (bytes_read < len) { @@ -189,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) metablocks += ds->indirect.len; /* - Double indir block, plus all the indirect blocks it maps. - In the double-indirect range, all block runs of data are - BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know - how many data block runs are in the double-indirect region, - and from that we know how many indirect blocks it takes to - map them. We assume that the indirect blocks are also - BEFS_DBLINDIR_BRUN_LEN blocks long. +* Double indir block, plus all the indirect blocks it maps. +* In the double-indirect range, all block runs of data are +* BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know +* how many data block runs are in the double-indirect region, +* and from that we know how many indirect blocks it takes to +* map them. We assume that the indirect blocks are also +* BEFS_DBLINDIR_BRUN_LEN blocks long. */ if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) { uint dbl_bytes; @@ -249,7 +250,7 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) */ static int befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, - befs_blocknr_t blockno, befs_block_run * run) + befs_blocknr_t blockno, befs_block_run *run) { int i; const befs_block_run *array = data->direct; @@ -261,6 +262,7 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, sum += array[i].len, i++) { if (blockno >= sum && blockno < sum + (array[i].len)) { int offset = blockno - sum; + run->allocation_group = array[i].allocation_group; run->start = array[i].start + offset; run->len = array[i].len - offset; @@ -304,7 +306,7 @@ static int befs_find_brun_indirect(struct super_block *sb, const
Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs
Forgot to mention... On 08/12, Bart Van Assche wrote: > > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1643,7 +1643,12 @@ find_page: >* wait_on_page_locked is used to avoid unnecessarily >* serialisations and why it's safe. >*/ > - wait_on_page_locked_killable(page); > + error = wait_on_page_locked_killable(page); > + if (error == -EINTR) { > + put_page(page); > + goto out; > + } > + error = 0; This change probably makes sense regardless although I'd suggest to simplify it: - wait_on_page_locked_killable(page); + error = wait_on_page_locked_killable(page); + if (unlikely(error)) + goto readpage_error; but it looks off-topic. And the changelog looks misleading/wrong. I do not think this change makes sense in this debugging session, Oleg.
Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs
Forgot to mention... On 08/12, Bart Van Assche wrote: > > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1643,7 +1643,12 @@ find_page: >* wait_on_page_locked is used to avoid unnecessarily >* serialisations and why it's safe. >*/ > - wait_on_page_locked_killable(page); > + error = wait_on_page_locked_killable(page); > + if (error == -EINTR) { > + put_page(page); > + goto out; > + } > + error = 0; This change probably makes sense regardless although I'd suggest to simplify it: - wait_on_page_locked_killable(page); + error = wait_on_page_locked_killable(page); + if (unlikely(error)) + goto readpage_error; but it looks off-topic. And the changelog looks misleading/wrong. I do not think this change makes sense in this debugging session, Oleg.
Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages
On Sat, 2016-08-13 at 09:41 -0700, Joe Perches wrote: > On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote: > > Le 13/08/2016 à 13:35, Joe Perches a écrit : > > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc) > > > > _cmd.dma); > > > > if (nonemb_cmd.va == NULL) { > > > > beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, > > > > - "BM_%d : Failed to allocate memory for" > > > > + "BM_%d : Failed to allocate memory for " > > > > "mgmt_invalidate_icds\n"); This is the first time I've looked at the beiscsi_log macro. It sure is odd and undesirable. It's _very_ not nice to have a format string take an implied __LINE__ argument. It'd be much more intelligible to take the first bit as a separate string, concatenate it in the macro with "_%d: " and __LINE__ (if that's really useful, I think it's not) and emit that as the format. Something like: diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 30a4606..3f0fbbf 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -1084,11 +1084,12 @@ struct hwi_context_memory { #define __beiscsi_log(phba, level, fmt, arg...) \ shost_printk(level, phba->shost, fmt, __LINE__, ##arg) -#define beiscsi_log(phba, level, mask, fmt, arg...) \ -do { \ - uint32_t log_value = phba->attr_log_enable; \ - if (((mask) & log_value) || (level[1] <= '3')) \ - __beiscsi_log(phba, level, fmt, ##arg); \ -} while (0); +#define beiscsi_log(phba, level, mask, prefix, fmt, ...) \ +do { \ + uint32_t log_value = phba->attr_log_enable; \ + if (((mask) & log_value) || (level[1] <= '3')) \ + __beiscsi_log(phba, level, prefix "_%d: " fmt, \ + ##__VA_ARGS__); \ +} while (0) #endif So these beiscsi_log uses become something like: beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, "BM", "Failed to allocate memory for mgmt_invalidate_icds\n"); and the format and its arguments match.
Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages
On Sat, 2016-08-13 at 09:41 -0700, Joe Perches wrote: > On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote: > > Le 13/08/2016 à 13:35, Joe Perches a écrit : > > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc) > > > > _cmd.dma); > > > > if (nonemb_cmd.va == NULL) { > > > > beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, > > > > - "BM_%d : Failed to allocate memory for" > > > > + "BM_%d : Failed to allocate memory for " > > > > "mgmt_invalidate_icds\n"); This is the first time I've looked at the beiscsi_log macro. It sure is odd and undesirable. It's _very_ not nice to have a format string take an implied __LINE__ argument. It'd be much more intelligible to take the first bit as a separate string, concatenate it in the macro with "_%d: " and __LINE__ (if that's really useful, I think it's not) and emit that as the format. Something like: diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 30a4606..3f0fbbf 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -1084,11 +1084,12 @@ struct hwi_context_memory { #define __beiscsi_log(phba, level, fmt, arg...) \ shost_printk(level, phba->shost, fmt, __LINE__, ##arg) -#define beiscsi_log(phba, level, mask, fmt, arg...) \ -do { \ - uint32_t log_value = phba->attr_log_enable; \ - if (((mask) & log_value) || (level[1] <= '3')) \ - __beiscsi_log(phba, level, fmt, ##arg); \ -} while (0); +#define beiscsi_log(phba, level, mask, prefix, fmt, ...) \ +do { \ + uint32_t log_value = phba->attr_log_enable; \ + if (((mask) & log_value) || (level[1] <= '3')) \ + __beiscsi_log(phba, level, prefix "_%d: " fmt, \ + ##__VA_ARGS__); \ +} while (0) #endif So these beiscsi_log uses become something like: beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, "BM", "Failed to allocate memory for mgmt_invalidate_icds\n"); and the format and its arguments match.
[PATCH] android: binder: Remove deprecated create_singlethread_workqueue
The workqueue is being used to run deferred work for the android binder. The "binder_deferred_workqueue" queues only a single work item and hence does not require ordering. Also, this workqueue is not being used on a memory recliam path. Hence, the singlethreaded workqueue has been replaced with the use of system_wq. System workqueues have been able to handle high level of concurrency for a long time now and hence it's not required to have a singlethreaded workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue created with create_singlethread_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantee unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Signed-off-by: Bhaktipriya Shridhar--- drivers/android/binder.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 16288e7..562af94 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -59,7 +59,6 @@ static struct dentry *binder_debugfs_dir_entry_proc; static struct binder_node *binder_context_mgr_node; static kuid_t binder_context_mgr_uid = INVALID_UID; static int binder_last_id; -static struct workqueue_struct *binder_deferred_workqueue; #define BINDER_DEBUG_ENTRY(name) \ static int binder_##name##_open(struct inode *inode, struct file *file) \ @@ -3227,7 +3226,7 @@ binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) if (hlist_unhashed(>deferred_work_node)) { hlist_add_head(>deferred_work_node, _deferred_list); - queue_work(binder_deferred_workqueue, _deferred_work); + schedule_work(_deferred_work); } mutex_unlock(_deferred_lock); } @@ -3679,10 +3678,6 @@ static int __init binder_init(void) { int ret; - binder_deferred_workqueue = create_singlethread_workqueue("binder"); - if (!binder_deferred_workqueue) - return -ENOMEM; - binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", -- 2.1.4
[PATCH] android: binder: Remove deprecated create_singlethread_workqueue
The workqueue is being used to run deferred work for the android binder. The "binder_deferred_workqueue" queues only a single work item and hence does not require ordering. Also, this workqueue is not being used on a memory recliam path. Hence, the singlethreaded workqueue has been replaced with the use of system_wq. System workqueues have been able to handle high level of concurrency for a long time now and hence it's not required to have a singlethreaded workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue created with create_singlethread_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantee unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Signed-off-by: Bhaktipriya Shridhar --- drivers/android/binder.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 16288e7..562af94 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -59,7 +59,6 @@ static struct dentry *binder_debugfs_dir_entry_proc; static struct binder_node *binder_context_mgr_node; static kuid_t binder_context_mgr_uid = INVALID_UID; static int binder_last_id; -static struct workqueue_struct *binder_deferred_workqueue; #define BINDER_DEBUG_ENTRY(name) \ static int binder_##name##_open(struct inode *inode, struct file *file) \ @@ -3227,7 +3226,7 @@ binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) if (hlist_unhashed(>deferred_work_node)) { hlist_add_head(>deferred_work_node, _deferred_list); - queue_work(binder_deferred_workqueue, _deferred_work); + schedule_work(_deferred_work); } mutex_unlock(_deferred_lock); } @@ -3679,10 +3678,6 @@ static int __init binder_init(void) { int ret; - binder_deferred_workqueue = create_singlethread_workqueue("binder"); - if (!binder_deferred_workqueue) - return -ENOMEM; - binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", -- 2.1.4
Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages
On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote: > Le 13/08/2016 à 13:35, Joe Perches a écrit : > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc) > > > _cmd.dma); > > > if (nonemb_cmd.va == NULL) { > > > beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, > > > - "BM_%d : Failed to allocate memory for" > > > + "BM_%d : Failed to allocate memory for " > > > "mgmt_invalidate_icds\n"); > > doesn't match commit log as no coalescing/concatenation > > is done. > > > > There are many of these. > > > I have *only* fixed the one reported by checkpatch and left the others > unchanged. > > My initial proposal was to fix incorrect strings, without modifying too > much the code. So I decided to do the minimum of changes. > > Should I resubmitted with: > - all strings *in the patch* concatenated? > - all strings *in the file*" concatenated? Hello Christophe You don't _have_ to do anything. I think the commit message is misleading. You could submit another patch that does the equivalent of: $ ./scripts/checkpatch.pl --types=SPLIT_STRING --fix-inplace drivers/scsi/be2iscsi/be_main.c with the appropriate commit message
Re: [PATCH 2/2 v3] be2iscsi: Fix some error messages
On Sat, 2016-08-13 at 14:31 +0200, Christophe JAILLET wrote: > Le 13/08/2016 à 13:35, Joe Perches a écrit : > > > @@ -268,7 +268,7 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc) > > > _cmd.dma); > > > if (nonemb_cmd.va == NULL) { > > > beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_EH, > > > - "BM_%d : Failed to allocate memory for" > > > + "BM_%d : Failed to allocate memory for " > > > "mgmt_invalidate_icds\n"); > > doesn't match commit log as no coalescing/concatenation > > is done. > > > > There are many of these. > > > I have *only* fixed the one reported by checkpatch and left the others > unchanged. > > My initial proposal was to fix incorrect strings, without modifying too > much the code. So I decided to do the minimum of changes. > > Should I resubmitted with: > - all strings *in the patch* concatenated? > - all strings *in the file*" concatenated? Hello Christophe You don't _have_ to do anything. I think the commit message is misleading. You could submit another patch that does the equivalent of: $ ./scripts/checkpatch.pl --types=SPLIT_STRING --fix-inplace drivers/scsi/be2iscsi/be_main.c with the appropriate commit message
[PATCH] edac: wq: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. wq has been identity converted since it is used to detect things like ECC memory errors. It's not recommended to stall it from memory pressure. Hence, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/edac/wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index 1b8c07e..2a9a11a 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(edac_stop_work); int edac_workqueue_setup(void) { - wq = create_singlethread_workqueue("edac-poller"); + wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM); if (!wq) return -ENODEV; else -- 2.1.4
[PATCH] edac: wq: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. wq has been identity converted since it is used to detect things like ECC memory errors. It's not recommended to stall it from memory pressure. Hence, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/edac/wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index 1b8c07e..2a9a11a 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(edac_stop_work); int edac_workqueue_setup(void) { - wq = create_singlethread_workqueue("edac-poller"); + wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM); if (!wq) return -ENODEV; else -- 2.1.4
[PATCH v3 6/7] x86: Fix thread_saved_pc()
thread_saved_pc() was using a completely bogus method to get the return address. Since switch_to() was previously inlined, there was no sane way to know where on the stack the return address was stored. Now with the frame of a sleeping thread well defined, this can be implemented correctly. Signed-off-by: Brian Gerst--- arch/x86/include/asm/processor.h | 10 ++ arch/x86/kernel/process.c| 11 +++ arch/x86/kernel/process_32.c | 8 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6fee863..b22fb5a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -721,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x) .addr_limit = KERNEL_DS, \ } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -773,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); .addr_limit = KERNEL_DS,\ } -/* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ +extern unsigned long thread_saved_pc(struct task_struct *tsk); + extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0115a4a..c1fa790 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -514,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct inactive_task_frame *frame = + (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); + return READ_ONCE_NOCHECK(frame->ret_addr); +} + +/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 18714a1..404efdf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,14 +55,6 @@ #include #include -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.sp)[3]; -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; -- 2.5.5
[PATCH v3 6/7] x86: Fix thread_saved_pc()
thread_saved_pc() was using a completely bogus method to get the return address. Since switch_to() was previously inlined, there was no sane way to know where on the stack the return address was stored. Now with the frame of a sleeping thread well defined, this can be implemented correctly. Signed-off-by: Brian Gerst --- arch/x86/include/asm/processor.h | 10 ++ arch/x86/kernel/process.c| 11 +++ arch/x86/kernel/process_32.c | 8 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6fee863..b22fb5a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -721,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x) .addr_limit = KERNEL_DS, \ } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -773,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); .addr_limit = KERNEL_DS,\ } -/* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ +extern unsigned long thread_saved_pc(struct task_struct *tsk); + extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0115a4a..c1fa790 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -514,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct inactive_task_frame *frame = + (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); + return READ_ONCE_NOCHECK(frame->ret_addr); +} + +/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 18714a1..404efdf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,14 +55,6 @@ #include #include -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.sp)[3]; -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; -- 2.5.5
[PATCH v3 3/7] x86: Add struct inactive_task_frame
Add struct inactive_task_frame, which defines the layout of the stack for a sleeping process. For now, the only defined field is the BP register (frame pointer). Signed-off-by: Brian Gerst--- arch/x86/include/asm/stacktrace.h | 4 ++-- arch/x86/include/asm/switch_to.h | 5 + arch/x86/kernel/kgdb.c| 3 ++- arch/x86/kernel/process.c | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0944218..7646fb2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -8,6 +8,7 @@ #include #include +#include extern int kstack_depth_to_print; @@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) return bp; } - /* bp is the last reg pushed by switch_to */ - return *(unsigned long *)task->thread.sp; + return ((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1..02de86e 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -8,6 +8,11 @@ struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +/* data that is pointed to by thread.sp */ +struct inactive_task_frame { + unsigned long bp; +}; + #ifdef CONFIG_X86_32 #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5e3f294..8e36f24 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -50,6 +50,7 @@ #include #include #include +#include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_DX]= 0; gdb_regs[GDB_SI]= 0; gdb_regs[GDB_DI]= 0; - gdb_regs[GDB_BP]= *(unsigned long *)p->thread.sp; + gdb_regs[GDB_BP]= ((struct inactive_task_frame *)p->thread.sp)->bp; #ifdef CONFIG_X86_32 gdb_regs[GDB_DS]= __KERNEL_DS; gdb_regs[GDB_ES]= __KERNEL_DS; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0115a4a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p) if (sp < bottom || sp > top) return 0; - fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); do { if (fp < bottom || fp > top) return 0; -- 2.5.5
[PATCH v3 4/7] x86: Rewrite switch_to() code
Move the low-level context switch code to an out-of-line asm stub instead of using complex inline asm. This allows constructing a new stack frame for the child process to make it seamlessly flow to ret_from_fork without an extra test and branch in __switch_to(). It also improves code generation for __schedule() by using the C calling convention instead of clobbering all registers. Signed-off-by: Brian Gerst--- arch/x86/entry/entry_32.S | 37 ++ arch/x86/entry/entry_64.S | 41 ++- arch/x86/include/asm/processor.h | 3 - arch/x86/include/asm/switch_to.h | 137 ++--- arch/x86/include/asm/thread_info.h | 2 - arch/x86/kernel/asm-offsets.c | 6 ++ arch/x86/kernel/asm-offsets_32.c | 5 ++ arch/x86/kernel/asm-offsets_64.c | 5 ++ arch/x86/kernel/process_32.c | 9 ++- arch/x86/kernel/process_64.c | 9 ++- arch/x86/kernel/smpboot.c | 1 - 11 files changed, 125 insertions(+), 130 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0b5..bf8f221 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -204,6 +204,43 @@ POP_GS_EX .endm +/* + * %eax: prev task + * %edx: next task + */ +ENTRY(__switch_to_asm) + /* +* Save callee-saved registers +* This must match the order in struct inactive_task_frame +*/ + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + + /* switch stack */ + movl%esp, TASK_threadsp(%eax) + movlTASK_threadsp(%edx), %esp + +#ifdef CONFIG_CC_STACKPROTECTOR + movlTASK_stack_canary(%edx), %ebx + movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popl%esi + popl%edi + popl%ebx + popl%ebp + + jmp __switch_to +END(__switch_to_asm) + +/* + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from + */ ENTRY(ret_from_fork) pushl %eax callschedule_tail diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f6b40e5..c1af8ac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -368,13 +368,48 @@ END(ptregs_\func) #include /* + * %rdi: prev task + * %rsi: next task + */ +ENTRY(__switch_to_asm) + /* +* Save callee-saved registers +* This must match the order in inactive_task_frame +*/ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* switch stack */ + movq%rsp, TASK_threadsp(%rdi) + movqTASK_threadsp(%rsi), %rsp + +#ifdef CONFIG_CC_STACKPROTECTOR + movqTASK_stack_canary(%rsi), %rbx + movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popq%r15 + popq%r14 + popq%r13 + popq%r12 + popq%rbx + popq%rbp + + jmp __switch_to +END(__switch_to_asm) + +/* * A newly forked process directly context switches into this address. * - * rdi: prev task we switched from + * rax: prev task we switched from */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) - + movq%rax, %rdi callschedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp)/* from kernel_thread? */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def95..6fee863 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -389,9 +389,6 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif -#ifdef CONFIG_X86_32 - unsigned long ip; -#endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 02de86e..bf4e2ec 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -2,135 +2,40 @@ #define _ASM_X86_SWITCH_TO_H struct task_struct; /* one of the stranger aspects of C forward declarations */ + +struct task_struct *__switch_to_asm(struct task_struct *prev, + struct task_struct *next); + __visible struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); + struct task_struct *next); struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* data that is pointed to by thread.sp */ struct inactive_task_frame {
[PATCH v3 5/7] x86: Pass kernel thread parameters in fork_frame
Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of struct fork_frame. Signed-off-by: Brian Gerst--- arch/x86/entry/entry_32.S| 31 +++ arch/x86/entry/entry_64.S| 37 + arch/x86/include/asm/switch_to.h | 2 ++ arch/x86/kernel/process_32.c | 18 -- arch/x86/kernel/process_64.c | 12 +++- 5 files changed, 41 insertions(+), 59 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bf8f221..b75a8bc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -240,35 +240,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg */ ENTRY(ret_from_fork) pushl %eax callschedule_tail popl%eax + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: /* When we fork, we trace the syscall return in the child, too. */ movl%esp, %eax callsyscall_return_slowpath jmp restore_all -END(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - pushl %eax - callschedule_tail - popl%eax - movlPT_EBP(%esp), %eax - call*PT_EBX(%esp) - movl$0, PT_EAX(%esp) + /* kernel thread */ +1: movl%edi, %eax + call*%ebx /* -* Kernel threads return to userspace as if returning from a syscall. -* We should check whether anything actually uses this path and, if so, -* consider switching it over to ret_from_fork. +* A kernel thread is allowed to return here after successfully +* calling do_execve(). Exit to userspace to complete the execve() +* syscall. */ - movl%esp, %eax - callsyscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + movl$0, PT_EAX(%esp) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c1af8ac..c0373d6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -407,37 +407,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg */ ENTRY(ret_from_fork) movq%rax, %rdi callschedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp)/* from kernel_thread? */ - jnz 1f - - /* -* We came from kernel_thread. This code path is quite twisted, and -* someone should clean it up. -* -* copy_thread_tls stashes the function pointer in RBX and the -* parameter to be passed in RBP. The called function is permitted -* to call do_execve and thereby jump to user mode. -*/ - movqRBP(%rsp), %rdi - call*RBX(%rsp) - movl$0, RAX(%rsp) - - /* -* Fall through as though we're exiting a syscall. This makes a -* twisted sort of sense if we just called do_execve. -*/ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ -1: +2: movq%rsp, %rdi callsyscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq%r12, %rdi + call*%rbx + /* +* A kernel thread is allowed to return here after successfully +* calling do_execve(). Exit to userspace to complete the execve() +* syscall. +*/ + movq$0, RAX(%rsp) + jmp 2b END(ret_from_fork) /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index bf4e2ec..33fb765 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,6 +12,8 @@ struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +asmlinkage void ret_from_fork(void); + /* data that is pointed to by thread.sp */ struct inactive_task_frame { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4bedbc0..18714a1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,9 +55,6 @@ #include #include -asmlinkage
[PATCH v3 7/7] Revert "sched: Mark __schedule() stack frame as non-standard"
Now that the x86 switch_to() uses the standard C calling convention, STACK_FRAME_NON_STANDARD is no longer needed. Suggested-by: Josh PoimboeufSigned-off-by: Brian Gerst --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3b6b23c..dbf73db 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3384,7 +3384,6 @@ static void __sched notrace __schedule(bool preempt) balance_callback(rq); } -STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ static inline void sched_submit_work(struct task_struct *tsk) { -- 2.5.5
[PATCH v3 3/7] x86: Add struct inactive_task_frame
Add struct inactive_task_frame, which defines the layout of the stack for a sleeping process. For now, the only defined field is the BP register (frame pointer). Signed-off-by: Brian Gerst --- arch/x86/include/asm/stacktrace.h | 4 ++-- arch/x86/include/asm/switch_to.h | 5 + arch/x86/kernel/kgdb.c| 3 ++- arch/x86/kernel/process.c | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0944218..7646fb2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -8,6 +8,7 @@ #include #include +#include extern int kstack_depth_to_print; @@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) return bp; } - /* bp is the last reg pushed by switch_to */ - return *(unsigned long *)task->thread.sp; + return ((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1..02de86e 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -8,6 +8,11 @@ struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +/* data that is pointed to by thread.sp */ +struct inactive_task_frame { + unsigned long bp; +}; + #ifdef CONFIG_X86_32 #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5e3f294..8e36f24 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -50,6 +50,7 @@ #include #include #include +#include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_DX]= 0; gdb_regs[GDB_SI]= 0; gdb_regs[GDB_DI]= 0; - gdb_regs[GDB_BP]= *(unsigned long *)p->thread.sp; + gdb_regs[GDB_BP]= ((struct inactive_task_frame *)p->thread.sp)->bp; #ifdef CONFIG_X86_32 gdb_regs[GDB_DS]= __KERNEL_DS; gdb_regs[GDB_ES]= __KERNEL_DS; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0e..0115a4a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p) if (sp < bottom || sp > top) return 0; - fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); do { if (fp < bottom || fp > top) return 0; -- 2.5.5
[PATCH v3 4/7] x86: Rewrite switch_to() code
Move the low-level context switch code to an out-of-line asm stub instead of using complex inline asm. This allows constructing a new stack frame for the child process to make it seamlessly flow to ret_from_fork without an extra test and branch in __switch_to(). It also improves code generation for __schedule() by using the C calling convention instead of clobbering all registers. Signed-off-by: Brian Gerst --- arch/x86/entry/entry_32.S | 37 ++ arch/x86/entry/entry_64.S | 41 ++- arch/x86/include/asm/processor.h | 3 - arch/x86/include/asm/switch_to.h | 137 ++--- arch/x86/include/asm/thread_info.h | 2 - arch/x86/kernel/asm-offsets.c | 6 ++ arch/x86/kernel/asm-offsets_32.c | 5 ++ arch/x86/kernel/asm-offsets_64.c | 5 ++ arch/x86/kernel/process_32.c | 9 ++- arch/x86/kernel/process_64.c | 9 ++- arch/x86/kernel/smpboot.c | 1 - 11 files changed, 125 insertions(+), 130 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0b5..bf8f221 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -204,6 +204,43 @@ POP_GS_EX .endm +/* + * %eax: prev task + * %edx: next task + */ +ENTRY(__switch_to_asm) + /* +* Save callee-saved registers +* This must match the order in struct inactive_task_frame +*/ + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + + /* switch stack */ + movl%esp, TASK_threadsp(%eax) + movlTASK_threadsp(%edx), %esp + +#ifdef CONFIG_CC_STACKPROTECTOR + movlTASK_stack_canary(%edx), %ebx + movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popl%esi + popl%edi + popl%ebx + popl%ebp + + jmp __switch_to +END(__switch_to_asm) + +/* + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from + */ ENTRY(ret_from_fork) pushl %eax callschedule_tail diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f6b40e5..c1af8ac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -368,13 +368,48 @@ END(ptregs_\func) #include /* + * %rdi: prev task + * %rsi: next task + */ +ENTRY(__switch_to_asm) + /* +* Save callee-saved registers +* This must match the order in inactive_task_frame +*/ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* switch stack */ + movq%rsp, TASK_threadsp(%rdi) + movqTASK_threadsp(%rsi), %rsp + +#ifdef CONFIG_CC_STACKPROTECTOR + movqTASK_stack_canary(%rsi), %rbx + movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popq%r15 + popq%r14 + popq%r13 + popq%r12 + popq%rbx + popq%rbp + + jmp __switch_to +END(__switch_to_asm) + +/* * A newly forked process directly context switches into this address. * - * rdi: prev task we switched from + * rax: prev task we switched from */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) - + movq%rax, %rdi callschedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp)/* from kernel_thread? */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def95..6fee863 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -389,9 +389,6 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif -#ifdef CONFIG_X86_32 - unsigned long ip; -#endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 02de86e..bf4e2ec 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -2,135 +2,40 @@ #define _ASM_X86_SWITCH_TO_H struct task_struct; /* one of the stranger aspects of C forward declarations */ + +struct task_struct *__switch_to_asm(struct task_struct *prev, + struct task_struct *next); + __visible struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); + struct task_struct *next); struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* data that is pointed to by thread.sp */ struct inactive_task_frame { +#ifdef
[PATCH v3 5/7] x86: Pass kernel thread parameters in fork_frame
Instead of setting up a fake pt_regs context, put the kernel thread function pointer and arg into the unused callee-restored registers of struct fork_frame. Signed-off-by: Brian Gerst --- arch/x86/entry/entry_32.S| 31 +++ arch/x86/entry/entry_64.S| 37 + arch/x86/include/asm/switch_to.h | 2 ++ arch/x86/kernel/process_32.c | 18 -- arch/x86/kernel/process_64.c | 12 +++- 5 files changed, 41 insertions(+), 59 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bf8f221..b75a8bc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -240,35 +240,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg */ ENTRY(ret_from_fork) pushl %eax callschedule_tail popl%eax + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: /* When we fork, we trace the syscall return in the child, too. */ movl%esp, %eax callsyscall_return_slowpath jmp restore_all -END(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - pushl %eax - callschedule_tail - popl%eax - movlPT_EBP(%esp), %eax - call*PT_EBX(%esp) - movl$0, PT_EAX(%esp) + /* kernel thread */ +1: movl%edi, %eax + call*%ebx /* -* Kernel threads return to userspace as if returning from a syscall. -* We should check whether anything actually uses this path and, if so, -* consider switching it over to ret_from_fork. +* A kernel thread is allowed to return here after successfully +* calling do_execve(). Exit to userspace to complete the execve() +* syscall. */ - movl%esp, %eax - callsyscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + movl$0, PT_EAX(%esp) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c1af8ac..c0373d6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -407,37 +407,34 @@ END(__switch_to_asm) * A newly forked process directly context switches into this address. * * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg */ ENTRY(ret_from_fork) movq%rax, %rdi callschedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp)/* from kernel_thread? */ - jnz 1f - - /* -* We came from kernel_thread. This code path is quite twisted, and -* someone should clean it up. -* -* copy_thread_tls stashes the function pointer in RBX and the -* parameter to be passed in RBP. The called function is permitted -* to call do_execve and thereby jump to user mode. -*/ - movqRBP(%rsp), %rdi - call*RBX(%rsp) - movl$0, RAX(%rsp) - - /* -* Fall through as though we're exiting a syscall. This makes a -* twisted sort of sense if we just called do_execve. -*/ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ -1: +2: movq%rsp, %rdi callsyscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq%r12, %rdi + call*%rbx + /* +* A kernel thread is allowed to return here after successfully +* calling do_execve(). Exit to userspace to complete the execve() +* syscall. +*/ + movq$0, RAX(%rsp) + jmp 2b END(ret_from_fork) /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index bf4e2ec..33fb765 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,6 +12,8 @@ struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +asmlinkage void ret_from_fork(void); + /* data that is pointed to by thread.sp */ struct inactive_task_frame { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4bedbc0..18714a1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,9 +55,6 @@ #include #include -asmlinkage void
[PATCH v3 7/7] Revert "sched: Mark __schedule() stack frame as non-standard"
Now that the x86 switch_to() uses the standard C calling convention, STACK_FRAME_NON_STANDARD is no longer needed. Suggested-by: Josh Poimboeuf Signed-off-by: Brian Gerst --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3b6b23c..dbf73db 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3384,7 +3384,6 @@ static void __sched notrace __schedule(bool preempt) balance_callback(rq); } -STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ static inline void sched_submit_work(struct task_struct *tsk) { -- 2.5.5
[PATCH v3 0/7] x86: Rewrite switch_to()
This patch set simplifies the switch_to() code, by moving the stack switch code out of line into an asm stub before calling __switch_to(). This ends up being more readable, and using the C calling convention instead of clobbering all registers improves code generation. It also allows newly forked processes to construct a special stack frame to seamlessly flow to ret_from_fork, instead of using a test and branch, or an unbalanced call/ret. Changes from v2: - Updated comments around kernel threads being uncommon for fork, etc. - Removed STACK_FRAME_NON_STANDARD annotation from __schedule() per Josh Poimboeuf - A few minor cleanups added Changes from v1: - Added struct inactive_task_frame - Added comments about kernel threads returning to userspace - Cleaned up some incorrect uses of thread.sp - Rearranged inactive stack frame so that BP (frame pointer) is in the natural position right below the return address. This should take care of unwinding issues Josh raised. Brian Gerst (7): x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs() x86-64, kgdb: clear GDB_PS on 64-bit x86: Add struct inactive_task_frame x86: Rewrite switch_to() code x86: Pass kernel thread parameters in fork_frame x86: Fix thread_saved_pc() Revert "sched: Mark __schedule() stack frame as non-standard" arch/x86/entry/entry_32.S | 68 +- arch/x86/entry/entry_64.S | 78 ++-- arch/x86/include/asm/processor.h | 13 +--- arch/x86/include/asm/stacktrace.h | 4 +- arch/x86/include/asm/switch_to.h | 144 - arch/x86/include/asm/thread_info.h | 2 - arch/x86/kernel/asm-offsets.c | 6 ++ arch/x86/kernel/asm-offsets_32.c | 5 ++ arch/x86/kernel/asm-offsets_64.c | 5 ++ arch/x86/kernel/kgdb.c | 8 +-- arch/x86/kernel/process.c | 14 +++- arch/x86/kernel/process_32.c | 31 +++- arch/x86/kernel/process_64.c | 21 +++--- arch/x86/kernel/smpboot.c | 1 - kernel/sched/core.c| 1 - 15 files changed, 190 insertions(+), 211 deletions(-) -- 2.5.5
[PATCH v3 1/7] x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()
Match 64-bit and set gdb_regs[GDB_PC] to zero. thread.ip is always the same point in the scheduler (except for newly forked processes), and will be removed in a future patch. Signed-off-by: Brian Gerst--- arch/x86/kernel/kgdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 04cde52..fe649a5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_ES]= __KERNEL_DS; gdb_regs[GDB_PS]= 0; gdb_regs[GDB_CS]= __KERNEL_CS; - gdb_regs[GDB_PC]= p->thread.ip; gdb_regs[GDB_SS]= __KERNEL_DS; gdb_regs[GDB_FS]= 0x; gdb_regs[GDB_GS]= 0x; @@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC]= 0; gdb_regs[GDB_R8]= 0; gdb_regs[GDB_R9]= 0; gdb_regs[GDB_R10] = 0; @@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_R14] = 0; gdb_regs[GDB_R15] = 0; #endif + gdb_regs[GDB_PC]= 0; gdb_regs[GDB_SP]= p->thread.sp; } -- 2.5.5
[PATCH v3 0/7] x86: Rewrite switch_to()
This patch set simplifies the switch_to() code, by moving the stack switch code out of line into an asm stub before calling __switch_to(). This ends up being more readable, and using the C calling convention instead of clobbering all registers improves code generation. It also allows newly forked processes to construct a special stack frame to seamlessly flow to ret_from_fork, instead of using a test and branch, or an unbalanced call/ret. Changes from v2: - Updated comments around kernel threads being uncommon for fork, etc. - Removed STACK_FRAME_NON_STANDARD annotation from __schedule() per Josh Poimboeuf - A few minor cleanups added Changes from v1: - Added struct inactive_task_frame - Added comments about kernel threads returning to userspace - Cleaned up some incorrect uses of thread.sp - Rearranged inactive stack frame so that BP (frame pointer) is in the natural position right below the return address. This should take care of unwinding issues Josh raised. Brian Gerst (7): x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs() x86-64, kgdb: clear GDB_PS on 64-bit x86: Add struct inactive_task_frame x86: Rewrite switch_to() code x86: Pass kernel thread parameters in fork_frame x86: Fix thread_saved_pc() Revert "sched: Mark __schedule() stack frame as non-standard" arch/x86/entry/entry_32.S | 68 +- arch/x86/entry/entry_64.S | 78 ++-- arch/x86/include/asm/processor.h | 13 +--- arch/x86/include/asm/stacktrace.h | 4 +- arch/x86/include/asm/switch_to.h | 144 - arch/x86/include/asm/thread_info.h | 2 - arch/x86/kernel/asm-offsets.c | 6 ++ arch/x86/kernel/asm-offsets_32.c | 5 ++ arch/x86/kernel/asm-offsets_64.c | 5 ++ arch/x86/kernel/kgdb.c | 8 +-- arch/x86/kernel/process.c | 14 +++- arch/x86/kernel/process_32.c | 31 +++- arch/x86/kernel/process_64.c | 21 +++--- arch/x86/kernel/smpboot.c | 1 - kernel/sched/core.c| 1 - 15 files changed, 190 insertions(+), 211 deletions(-) -- 2.5.5
[PATCH v3 1/7] x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs()
Match 64-bit and set gdb_regs[GDB_PC] to zero. thread.ip is always the same point in the scheduler (except for newly forked processes), and will be removed in a future patch. Signed-off-by: Brian Gerst --- arch/x86/kernel/kgdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 04cde52..fe649a5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_ES]= __KERNEL_DS; gdb_regs[GDB_PS]= 0; gdb_regs[GDB_CS]= __KERNEL_CS; - gdb_regs[GDB_PC]= p->thread.ip; gdb_regs[GDB_SS]= __KERNEL_DS; gdb_regs[GDB_FS]= 0x; gdb_regs[GDB_GS]= 0x; @@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC]= 0; gdb_regs[GDB_R8]= 0; gdb_regs[GDB_R9]= 0; gdb_regs[GDB_R10] = 0; @@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_R14] = 0; gdb_regs[GDB_R15] = 0; #endif + gdb_regs[GDB_PC]= 0; gdb_regs[GDB_SP]= p->thread.sp; } -- 2.5.5
[PATCH v3 2/7] x86-64, kgdb: clear GDB_PS on 64-bit
switch_to() no longer saves EFLAGS, so it's bogus to look for it on the stack. Set it to zero like 32-bit. Signed-off-by: Brian Gerst--- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index fe649a5..5e3f294 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -176,7 +176,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_FS]= 0x; gdb_regs[GDB_GS]= 0x; #else - gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_PS] = 0; gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_R8]= 0; -- 2.5.5
[PATCH v3 2/7] x86-64, kgdb: clear GDB_PS on 64-bit
switch_to() no longer saves EFLAGS, so it's bogus to look for it on the stack. Set it to zero like 32-bit. Signed-off-by: Brian Gerst --- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index fe649a5..5e3f294 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -176,7 +176,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_FS]= 0x; gdb_regs[GDB_GS]= 0x; #else - gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_PS] = 0; gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_R8]= 0; -- 2.5.5
[PATCH 8/8] power: ds2760_battery: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "monitor_wqueue" is used to monitor the battery status. It has been identity converted. It queues multiple work items viz >monitor_work, >set_charged_work, which require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/ds2760_battery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index 80f73cc..ac92e80 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -566,7 +566,8 @@ static int ds2760_battery_probe(struct platform_device *pdev) INIT_DELAYED_WORK(>monitor_work, ds2760_battery_work); INIT_DELAYED_WORK(>set_charged_work, ds2760_battery_set_charged_work); - di->monitor_wqueue = create_singlethread_workqueue(dev_name(>dev)); + di->monitor_wqueue = alloc_ordered_workqueue(dev_name(>dev), +WQ_MEM_RECLAIM); if (!di->monitor_wqueue) { retval = -ESRCH; goto workqueue_failed; -- 2.1.4
[PATCH 8/8] power: ds2760_battery: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "monitor_wqueue" is used to monitor the battery status. It has been identity converted. It queues multiple work items viz >monitor_work, >set_charged_work, which require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/ds2760_battery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index 80f73cc..ac92e80 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -566,7 +566,8 @@ static int ds2760_battery_probe(struct platform_device *pdev) INIT_DELAYED_WORK(>monitor_work, ds2760_battery_work); INIT_DELAYED_WORK(>set_charged_work, ds2760_battery_set_charged_work); - di->monitor_wqueue = create_singlethread_workqueue(dev_name(>dev)); + di->monitor_wqueue = alloc_ordered_workqueue(dev_name(>dev), +WQ_MEM_RECLAIM); if (!di->monitor_wqueue) { retval = -ESRCH; goto workqueue_failed; -- 2.1.4
[PATCH 7/8] power: ab8500_fg: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "fg_wq" is used for running the FG algorithm periodically. It has been identity converted. It has multiple work items viz fg_periodic_work, fg_low_bat_work, fg_reinit_work, fg_work, fg_acc_cur_work and fg_check_hw_failure_work, which require execution ordering. Hence, a dedicated ordered workqueue has been used here. The WQ_MEM_RECLAIM flag has been set to guarantee forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/ab8500_fg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 5a36cf8..199f2db 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -3096,7 +3096,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_INIT); /* Create a work queue for running the FG algorithm */ - di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq"); + di->fg_wq = alloc_ordered_workqueue("ab8500_fg_wq", WQ_MEM_RECLAIM); if (di->fg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 7/8] power: ab8500_fg: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "fg_wq" is used for running the FG algorithm periodically. It has been identity converted. It has multiple work items viz fg_periodic_work, fg_low_bat_work, fg_reinit_work, fg_work, fg_acc_cur_work and fg_check_hw_failure_work, which require execution ordering. Hence, a dedicated ordered workqueue has been used here. The WQ_MEM_RECLAIM flag has been set to guarantee forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/ab8500_fg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 5a36cf8..199f2db 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -3096,7 +3096,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_INIT); /* Create a work queue for running the FG algorithm */ - di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq"); + di->fg_wq = alloc_ordered_workqueue("ab8500_fg_wq", WQ_MEM_RECLAIM); if (di->fg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 6/8] power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue
The workqueue "wq" is used for handling battery related tasks. It has a single work item viz >update and hence it doesn't require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/ipaq_micro_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ipaq_micro_battery.c b/drivers/power/ipaq_micro_battery.c index 35b01c7..4af7b77 100644 --- a/drivers/power/ipaq_micro_battery.c +++ b/drivers/power/ipaq_micro_battery.c @@ -235,7 +235,7 @@ static int micro_batt_probe(struct platform_device *pdev) return -ENOMEM; mb->micro = dev_get_drvdata(pdev->dev.parent); - mb->wq = create_singlethread_workqueue("ipaq-battery-wq"); + mb->wq = alloc_workqueue("ipaq-battery-wq", WQ_MEM_RECLAIM, 0); if (!mb->wq) return -ENOMEM; -- 2.1.4
[PATCH 6/8] power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue
The workqueue "wq" is used for handling battery related tasks. It has a single work item viz >update and hence it doesn't require execution ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/ipaq_micro_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ipaq_micro_battery.c b/drivers/power/ipaq_micro_battery.c index 35b01c7..4af7b77 100644 --- a/drivers/power/ipaq_micro_battery.c +++ b/drivers/power/ipaq_micro_battery.c @@ -235,7 +235,7 @@ static int micro_batt_probe(struct platform_device *pdev) return -ENOMEM; mb->micro = dev_get_drvdata(pdev->dev.parent); - mb->wq = create_singlethread_workqueue("ipaq-battery-wq"); + mb->wq = alloc_workqueue("ipaq-battery-wq", WQ_MEM_RECLAIM, 0); if (!mb->wq) return -ENOMEM; -- 2.1.4
Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs
On 08/12, Bart Van Assche wrote: > > On 08/12/2016 09:16 AM, Oleg Nesterov wrote: > > Please drop two patches I sent before and try the new one below. > > Hello Oleg, > > Thanks for the patch. In addition to your patch I also applied the > attached two patches And I guess you did this because you think we do not have enough confusion so you decided to add a bit more ;) Could you please test my patch alone without additional changes? > before I started testing. It took some time > before I could reproduce the hang in truncate_inode_pages_range(). all I can say this contradicts with the prvious testing results with my previous patch or with your change in abort_exclusive_wait(). > +int __lock_page_impl(struct page *page, int mode) > +{ > + struct page *page_head = compound_head(page); > + DEFINE_WAIT_BIT(wait, _head->flags, PG_locked); > + struct task_struct *owner; > + int res; > + > + for (;;) { > + wait.key.timeout = jiffies + 30 * HZ; > + res = __wait_on_bit_lock(page_waitqueue(page_head), > + , bit_wait_io_timeout, mode); > + if (res == 0) { > + set_page_lock_owner(page, current); this is not right, you should use page_head. Although I doubt this can make a difference in this case. The same for get_page_lock_owner() below. > + break; > + } > + if (res == -EINTR) > + break; > + owner = get_page_lock_owner(page); > + pr_info("%s / pid %d / m %#x: %s - continuing to wait for %d\n", > + __func__, task_pid_nr(current), mode, res == -EAGAIN ? > + "timeout" : "interrupted", > + owner ? task_pid_nr(owner) : 0); I thought about the similar debugging patch too. But this is not what we need. Note that if res == -EAGAIN then another exlcusive waiter was already woken and it can lock this page and set get_page_lock_owner(). So this can't actually help if the problem is the missed/lost wakeup. Not that it explains the strange dmesg you reported. Perhaps your patch has other bugs, or my patch is buggy, or both. Please do not mix them. As for "add the timeout" idea it makes sense too and perhaps we will test this later, but we can start with the much more simple patch. Oleg.
Re: [PATCH] sched: Avoid that __wait_on_bit_lock() hangs
On 08/12, Bart Van Assche wrote: > > On 08/12/2016 09:16 AM, Oleg Nesterov wrote: > > Please drop two patches I sent before and try the new one below. > > Hello Oleg, > > Thanks for the patch. In addition to your patch I also applied the > attached two patches And I guess you did this because you think we do not have enough confusion so you decided to add a bit more ;) Could you please test my patch alone without additional changes? > before I started testing. It took some time > before I could reproduce the hang in truncate_inode_pages_range(). all I can say this contradicts with the prvious testing results with my previous patch or with your change in abort_exclusive_wait(). > +int __lock_page_impl(struct page *page, int mode) > +{ > + struct page *page_head = compound_head(page); > + DEFINE_WAIT_BIT(wait, _head->flags, PG_locked); > + struct task_struct *owner; > + int res; > + > + for (;;) { > + wait.key.timeout = jiffies + 30 * HZ; > + res = __wait_on_bit_lock(page_waitqueue(page_head), > + , bit_wait_io_timeout, mode); > + if (res == 0) { > + set_page_lock_owner(page, current); this is not right, you should use page_head. Although I doubt this can make a difference in this case. The same for get_page_lock_owner() below. > + break; > + } > + if (res == -EINTR) > + break; > + owner = get_page_lock_owner(page); > + pr_info("%s / pid %d / m %#x: %s - continuing to wait for %d\n", > + __func__, task_pid_nr(current), mode, res == -EAGAIN ? > + "timeout" : "interrupted", > + owner ? task_pid_nr(owner) : 0); I thought about the similar debugging patch too. But this is not what we need. Note that if res == -EAGAIN then another exlcusive waiter was already woken and it can lock this page and set get_page_lock_owner(). So this can't actually help if the problem is the missed/lost wakeup. Not that it explains the strange dmesg you reported. Perhaps your patch has other bugs, or my patch is buggy, or both. Please do not mix them. As for "add the timeout" idea it makes sense too and perhaps we will test this later, but we can start with the much more simple patch. Oleg.
[PATCH 5/8] power: ab8500_charger: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "charger_wq" is used for the IRQs and checking HW state of the charger. It has been identity converted. It has multiple work items viz usb_charger_attached_work, kick_wd_work, check_vbat_work, check_hw_failure_work, usb_charger_attached_work, ac_work, ac_charger_attached_work, attach_work and check_usbchgnotok_work, which require execution ordering. Hence, a dedicated ordered workqueue has been used here. The WQ_MEM_RECLAIM flag has also been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/ab8500_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 30de5d4..5cee9aa 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -3540,8 +3540,8 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_state.usb_current = -1; /* Create a work queue for the charger */ - di->charger_wq = - create_singlethread_workqueue("ab8500_charger_wq"); + di->charger_wq = alloc_ordered_workqueue("ab8500_charger_wq", +WQ_MEM_RECLAIM); if (di->charger_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 5/8] power: ab8500_charger: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "charger_wq" is used for the IRQs and checking HW state of the charger. It has been identity converted. It has multiple work items viz usb_charger_attached_work, kick_wd_work, check_vbat_work, check_hw_failure_work, usb_charger_attached_work, ac_work, ac_charger_attached_work, attach_work and check_usbchgnotok_work, which require execution ordering. Hence, a dedicated ordered workqueue has been used here. The WQ_MEM_RECLAIM flag has also been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/ab8500_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 30de5d4..5cee9aa 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -3540,8 +3540,8 @@ static int ab8500_charger_probe(struct platform_device *pdev) di->usb_state.usb_current = -1; /* Create a work queue for the charger */ - di->charger_wq = - create_singlethread_workqueue("ab8500_charger_wq"); + di->charger_wq = alloc_ordered_workqueue("ab8500_charger_wq", +WQ_MEM_RECLAIM); if (di->charger_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 4/8] power: intel_mid_battery: Remove deprecated create_singlethread_workqueue
The workqueue "monitor_wqueue" is used to monitor the PMIC battery status. It queues a single work item (pbi->monitor_battery) and hence doesn't require ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. Since PMIC battery status needs to be monitored for any change, the WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/intel_mid_battery.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c index 9fa4acc..dc7feef 100644 --- a/drivers/power/intel_mid_battery.c +++ b/drivers/power/intel_mid_battery.c @@ -689,8 +689,7 @@ static int probe(int irq, struct device *dev) /* initialize all required framework before enabling interrupts */ INIT_WORK(>handler, pmic_battery_handle_intrpt); INIT_DELAYED_WORK(>monitor_battery, pmic_battery_monitor); - pbi->monitor_wqueue = - create_singlethread_workqueue(dev_name(dev)); + pbi->monitor_wqueue = alloc_workqueue(dev_name(dev), WQ_MEM_RECLAIM, 0); if (!pbi->monitor_wqueue) { dev_err(dev, "%s(): wqueue init failed\n", __func__); retval = -ESRCH; -- 2.1.4
[PATCH 4/8] power: intel_mid_battery: Remove deprecated create_singlethread_workqueue
The workqueue "monitor_wqueue" is used to monitor the PMIC battery status. It queues a single work item (pbi->monitor_battery) and hence doesn't require ordering. Hence, alloc_workqueue has been used to replace the deprecated create_singlethread_workqueue instance. Since PMIC battery status needs to be monitored for any change, the WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/intel_mid_battery.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c index 9fa4acc..dc7feef 100644 --- a/drivers/power/intel_mid_battery.c +++ b/drivers/power/intel_mid_battery.c @@ -689,8 +689,7 @@ static int probe(int irq, struct device *dev) /* initialize all required framework before enabling interrupts */ INIT_WORK(>handler, pmic_battery_handle_intrpt); INIT_DELAYED_WORK(>monitor_battery, pmic_battery_monitor); - pbi->monitor_wqueue = - create_singlethread_workqueue(dev_name(dev)); + pbi->monitor_wqueue = alloc_workqueue(dev_name(dev), WQ_MEM_RECLAIM, 0); if (!pbi->monitor_wqueue) { dev_err(dev, "%s(): wqueue init failed\n", __func__); retval = -ESRCH; -- 2.1.4
[PATCH 3/8] power: pm2301_charger: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "charger_wq" is used for running all the charger related tasks. This involves charger detection, checking for HW failure and HW status. This workqueue has been identity converted. It queues multiple workitems viz >check_main_thermal_prot_work, >check_hw_failure_work, >ac_work. Hence, the deprecated create_singlethread_workqueue() instance has been replaced with a dedicated ordered workqueue. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/pm2301_charger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c index fb62ed3..78561b6 100644 --- a/drivers/power/pm2301_charger.c +++ b/drivers/power/pm2301_charger.c @@ -1054,7 +1054,8 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, pm2->ac_chg.external = true; /* Create a work queue for the charger */ - pm2->charger_wq = create_singlethread_workqueue("pm2xxx_charger_wq"); + pm2->charger_wq = alloc_ordered_workqueue("pm2xxx_charger_wq", + WQ_MEM_RECLAIM); if (pm2->charger_wq == NULL) { ret = -ENOMEM; dev_err(pm2->dev, "failed to create work queue\n"); -- 2.1.4
[PATCH 3/8] power: pm2301_charger: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "charger_wq" is used for running all the charger related tasks. This involves charger detection, checking for HW failure and HW status. This workqueue has been identity converted. It queues multiple workitems viz >check_main_thermal_prot_work, >check_hw_failure_work, >ac_work. Hence, the deprecated create_singlethread_workqueue() instance has been replaced with a dedicated ordered workqueue. The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/pm2301_charger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c index fb62ed3..78561b6 100644 --- a/drivers/power/pm2301_charger.c +++ b/drivers/power/pm2301_charger.c @@ -1054,7 +1054,8 @@ static int pm2xxx_wall_charger_probe(struct i2c_client *i2c_client, pm2->ac_chg.external = true; /* Create a work queue for the charger */ - pm2->charger_wq = create_singlethread_workqueue("pm2xxx_charger_wq"); + pm2->charger_wq = alloc_ordered_workqueue("pm2xxx_charger_wq", + WQ_MEM_RECLAIM); if (pm2->charger_wq == NULL) { ret = -ENOMEM; dev_err(pm2->dev, "failed to create work queue\n"); -- 2.1.4
[PATCH 2/8] power: ab8500_btemp: Remove deprecated create_singlethread_workqueue
The workqueue "btemp_wq" is used for measuring the temperature periodically. It queues a single workitem (btemp_periodic_work) and hence doesn't require ordering. Thus, the deprecated create_singlethread_workqueue() instance has been replaced with alloc_workqueue(). The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/ab8500_btemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index bf2e5dd..6ffdc18 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -1095,7 +1095,7 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Create a work queue for the btemp */ di->btemp_wq = - create_singlethread_workqueue("ab8500_btemp_wq"); + alloc_workqueue("ab8500_btemp_wq", WQ_MEM_RECLAIM, 0); if (di->btemp_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 2/8] power: ab8500_btemp: Remove deprecated create_singlethread_workqueue
The workqueue "btemp_wq" is used for measuring the temperature periodically. It queues a single workitem (btemp_periodic_work) and hence doesn't require ordering. Thus, the deprecated create_singlethread_workqueue() instance has been replaced with alloc_workqueue(). The WQ_MEM_RECLAIM flag has been set to ensure forward progress under memory pressure. Since there is a single work item, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/ab8500_btemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index bf2e5dd..6ffdc18 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -1095,7 +1095,7 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Create a work queue for the btemp */ di->btemp_wq = - create_singlethread_workqueue("ab8500_btemp_wq"); + alloc_workqueue("ab8500_btemp_wq", WQ_MEM_RECLAIM, 0); if (di->btemp_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 1/8] power: abx500_chargalg: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "chargalg_wq" is used for running the charging algorithm. It has multiple workitems viz >chargalg_periodic_work, >chargalg_wd_work, >chargalg_work per abx500_chargalg, which require ordering. It has been identity converted. Also, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar--- drivers/power/abx500_chargalg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index d9104b1..a4411d6 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -2091,8 +2091,8 @@ static int abx500_chargalg_probe(struct platform_device *pdev) abx500_chargalg_maintenance_timer_expired; /* Create a work queue for the chargalg */ - di->chargalg_wq = - create_singlethread_workqueue("abx500_chargalg_wq"); + di->chargalg_wq = alloc_ordered_workqueue("abx500_chargalg_wq", + WQ_MEM_RECLAIM); if (di->chargalg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 1/8] power: abx500_chargalg: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue() with WQ_MEM_RECLAIM set replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "chargalg_wq" is used for running the charging algorithm. It has multiple workitems viz >chargalg_periodic_work, >chargalg_wd_work, >chargalg_work per abx500_chargalg, which require ordering. It has been identity converted. Also, WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar --- drivers/power/abx500_chargalg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index d9104b1..a4411d6 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -2091,8 +2091,8 @@ static int abx500_chargalg_probe(struct platform_device *pdev) abx500_chargalg_maintenance_timer_expired; /* Create a work queue for the chargalg */ - di->chargalg_wq = - create_singlethread_workqueue("abx500_chargalg_wq"); + di->chargalg_wq = alloc_ordered_workqueue("abx500_chargalg_wq", + WQ_MEM_RECLAIM); if (di->chargalg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); return -ENOMEM; -- 2.1.4
[PATCH 0/8] Remove deprecated workqueue interface users
This patch set removes the instances of deprecated create_singlethread_workqueues in drivers/power by making the appropriate conversions. Bhaktipriya Shridhar (8): power: abx500_chargalg: Remove deprecated create_singlethread_workqueue power: ab8500_btemp: Remove deprecated create_singlethread_workqueue power: pm2301_charger: Remove deprecated create_singlethread_workqueue power: intel_mid_battery: Remove deprecated create_singlethread_workqueue power: ab8500_charger: Remove deprecated create_singlethread_workqueue power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue power: ab8500_fg: Remove deprecated create_singlethread_workqueue power: ds2760_battery: Remove deprecated create_singlethread_workqueue drivers/power/ab8500_btemp.c | 2 +- drivers/power/ab8500_charger.c | 4 ++-- drivers/power/ab8500_fg.c | 2 +- drivers/power/abx500_chargalg.c| 4 ++-- drivers/power/ds2760_battery.c | 3 ++- drivers/power/intel_mid_battery.c | 3 +-- drivers/power/ipaq_micro_battery.c | 2 +- drivers/power/pm2301_charger.c | 3 ++- 8 files changed, 12 insertions(+), 11 deletions(-) -- 2.1.4
[PATCH 0/8] Remove deprecated workqueue interface users
This patch set removes the instances of deprecated create_singlethread_workqueues in drivers/power by making the appropriate conversions. Bhaktipriya Shridhar (8): power: abx500_chargalg: Remove deprecated create_singlethread_workqueue power: ab8500_btemp: Remove deprecated create_singlethread_workqueue power: pm2301_charger: Remove deprecated create_singlethread_workqueue power: intel_mid_battery: Remove deprecated create_singlethread_workqueue power: ab8500_charger: Remove deprecated create_singlethread_workqueue power: ipaq_micro_battery: Remove deprecated create_singlethread_workqueue power: ab8500_fg: Remove deprecated create_singlethread_workqueue power: ds2760_battery: Remove deprecated create_singlethread_workqueue drivers/power/ab8500_btemp.c | 2 +- drivers/power/ab8500_charger.c | 4 ++-- drivers/power/ab8500_fg.c | 2 +- drivers/power/abx500_chargalg.c| 4 ++-- drivers/power/ds2760_battery.c | 3 ++- drivers/power/intel_mid_battery.c | 3 +-- drivers/power/ipaq_micro_battery.c | 2 +- drivers/power/pm2301_charger.c | 3 ++- 8 files changed, 12 insertions(+), 11 deletions(-) -- 2.1.4
Re: [RFC PATCH 0/3] Documentation: switch to pdflatex and fix pdf build
Am 13.08.2016 um 00:40 schrieb Jonathan Corbet: > On Wed, 10 Aug 2016 18:54:06 +0300 > Jani Nikula wrote: > >> With these you should be able to get started with pdf generation. It's a >> quick transition to pdflatex, the patches are not very pretty, but the >> pdf output is. Patch 3/3 works as an example where to add your stuff >> (latex_documents in conf.py) and how. > > OK, now I have a bone to pick with you. > > I applied this, then decided to install the needed toolchain on the > Tumbleweed system I've been playing with; it wanted to install 1,727 > packages to get pdflatex. Pandoc just doesn't seem so bad anymore. I'am complete disenchanted on this topic. My experience is: 1) You wan't get any reasonable typesetting engine which preserves your disk space. I don't know how many files or packages are installed, the only thing I know is, a TeX installation is always >1GB. 2) You wan't get a (pdf, ps,..) book with a perfect layout without any handcraft or at least a *theming*. TeX has many options to influence the layout and Sphinx provides it's own LaTeX-document class (sphinxmanual) which is IMHO awful. > So I switched to the Fedora system, and found myself in a twisty maze of > missing font files, missing style files, missing babel crap, etc., each > doled out to me one file per run. But I did eventually get PDFs out of > it. On debian it should be enough to install *base* and *recommended* sudo apt-get install texlive-base texlive-latex-recommended > The output isn't great; among other things, it seems to be about 1/2 blank > pages. 1/2 ? .. I have only empty pages at the start of parts or chapters, which is a typical layout setting. > But it's something. This is the sphinxmanual document class. > I've applied this so we have something to play with, but it doesn't feel > like a great solution. This is the sort of installation hell that we > wanted to get away from. See above, on debian it should be enough to install the two meta packages. > It makes me wonder how hard it can really be to > fix rst2pdf; I wish I could say I'll find some time to figure that out. > Sigh. I gave it a try, but as I come closer to the sources I realized that it is hair-raising. I looked at the issues, added a comment to a related issue, a few days later the issue was closed without any comment or code change. https://github.com/rst2pdf/rst2pdf/issues/556#issuecomment-228779542 My advice, if you don't like to waste your time: forget it. Some thoughts of mine, wrote in an earlier mail: > The sphinx-doc build-in LaTeX builder > > * http://www.sphinx-doc.org/en/stable/config.html#options-for-latex-output > > has some drawbacks, e.g. it produce LaTeX for the pdfTeX engine. > LaTeX is by default ASCII and it needs some "inputenc" to supporta wider > range of characters. This is not very helpful if you have a toolchain > in an international community. > > The alternative to LaTeX is to use the XeTeX engine, which supports UTF-8 > encoded input by default and supports TrueType/OpenType fonts directly. > Thats why I started to write a XeLaTeX builder ... > > * > https://github.com/return42/sphkerneldoc/blob/master/scripts/site-python/xelatex_ext/__init__.py#L15 > > > ... but I can't predict when this will be finished ... > > However which tool is used, my experience is, that building > PDF (books) with a minimum of quality is not simple. > Layout width tables, split table content over pages, switch > from landscape to portrait and versus, the flow of objects etc. > .. all this will need some manually interventions. -- Markus --
Re: [RFC PATCH 0/3] Documentation: switch to pdflatex and fix pdf build
Am 13.08.2016 um 00:40 schrieb Jonathan Corbet : > On Wed, 10 Aug 2016 18:54:06 +0300 > Jani Nikula wrote: > >> With these you should be able to get started with pdf generation. It's a >> quick transition to pdflatex, the patches are not very pretty, but the >> pdf output is. Patch 3/3 works as an example where to add your stuff >> (latex_documents in conf.py) and how. > > OK, now I have a bone to pick with you. > > I applied this, then decided to install the needed toolchain on the > Tumbleweed system I've been playing with; it wanted to install 1,727 > packages to get pdflatex. Pandoc just doesn't seem so bad anymore. I'am complete disenchanted on this topic. My experience is: 1) You wan't get any reasonable typesetting engine which preserves your disk space. I don't know how many files or packages are installed, the only thing I know is, a TeX installation is always >1GB. 2) You wan't get a (pdf, ps,..) book with a perfect layout without any handcraft or at least a *theming*. TeX has many options to influence the layout and Sphinx provides it's own LaTeX-document class (sphinxmanual) which is IMHO awful. > So I switched to the Fedora system, and found myself in a twisty maze of > missing font files, missing style files, missing babel crap, etc., each > doled out to me one file per run. But I did eventually get PDFs out of > it. On debian it should be enough to install *base* and *recommended* sudo apt-get install texlive-base texlive-latex-recommended > The output isn't great; among other things, it seems to be about 1/2 blank > pages. 1/2 ? .. I have only empty pages at the start of parts or chapters, which is a typical layout setting. > But it's something. This is the sphinxmanual document class. > I've applied this so we have something to play with, but it doesn't feel > like a great solution. This is the sort of installation hell that we > wanted to get away from. See above, on debian it should be enough to install the two meta packages. > It makes me wonder how hard it can really be to > fix rst2pdf; I wish I could say I'll find some time to figure that out. > Sigh. I gave it a try, but as I come closer to the sources I realized that it is hair-raising. I looked at the issues, added a comment to a related issue, a few days later the issue was closed without any comment or code change. https://github.com/rst2pdf/rst2pdf/issues/556#issuecomment-228779542 My advice, if you don't like to waste your time: forget it. Some thoughts of mine, wrote in an earlier mail: > The sphinx-doc build-in LaTeX builder > > * http://www.sphinx-doc.org/en/stable/config.html#options-for-latex-output > > has some drawbacks, e.g. it produce LaTeX for the pdfTeX engine. > LaTeX is by default ASCII and it needs some "inputenc" to supporta wider > range of characters. This is not very helpful if you have a toolchain > in an international community. > > The alternative to LaTeX is to use the XeTeX engine, which supports UTF-8 > encoded input by default and supports TrueType/OpenType fonts directly. > Thats why I started to write a XeLaTeX builder ... > > * > https://github.com/return42/sphkerneldoc/blob/master/scripts/site-python/xelatex_ext/__init__.py#L15 > > > ... but I can't predict when this will be finished ... > > However which tool is used, my experience is, that building > PDF (books) with a minimum of quality is not simple. > Layout width tables, split table content over pages, switch > from landscape to portrait and versus, the flow of objects etc. > .. all this will need some manually interventions. -- Markus --
RE: [RFC][PATCH 7/7] cpufreq: intel_pstate: Change P-state selection algorithm for Core
On 2016.08.05 17:02 Rafael J. Wysocki wrote: >> On 2016.08.03 21:19 Doug Smythies wrote: >>> On 2016.07.31 16:49 Rafael J. Wysocki wrote: >>> >>> The PID-base P-state selection algorithm used by intel_pstate for >>> Core processors is based on very weak foundations. >> >> ...[cut]... >> >>> +static inline int32_t get_target_pstate_default(struct cpudata *cpu) >>> +{ >>> + struct sample *sample = >sample; >>> + int32_t busy_frac; >>> + int pstate; >>> + >>> + busy_frac = div_fp(sample->mperf, sample->tsc); >>> + sample->busy_scaled = busy_frac * 100; >>> + >>> + if (busy_frac < cpu->iowait_boost) >>> + busy_frac = cpu->iowait_boost; >>> + >>> + cpu->iowait_boost >>= 1; >>> + >>> + pstate = cpu->pstate.turbo_pstate; >>> + return fp_toint((pstate + (pstate >> 2)) * busy_frac); >>> +} >>> + >> My previous replies (and see below) have suggested that some filtering is needed on the target pstate, otherwise, and dependant on the type of workload, it tends to oscillate. I added the IIR (Infinite Impulse Response) filter that I have suggested in the past: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c43ef55..262ec5f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -98,6 +98,7 @@ static inline u64 div_ext_fp(u64 x, u64 y) * @tsc: Difference of time stamp counter between last and * current sample * @time: Current time from scheduler + * @target:target pstate filtered. * * This structure is used in the cpudata structure to store performance sample * data for choosing next P State. @@ -108,6 +109,7 @@ struct sample { u64 aperf; u64 mperf; u64 tsc; + u64 target; u64 time; }; @@ -1168,6 +1170,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) pstate_funcs.get_vid(cpu); intel_pstate_set_min_pstate(cpu); + cpu->sample.target = int_tofp(cpu->pstate.min_pstate); } static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) @@ -1301,8 +1304,10 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) static inline int32_t get_target_pstate_default(struct cpudata *cpu) { struct sample *sample = >sample; + int64_t scaled_gain, unfiltered_target; int32_t busy_frac; int pstate; + u64 duration_ns; busy_frac = div_fp(sample->mperf, sample->tsc); sample->busy_scaled = busy_frac * 100; @@ -1313,7 +1318,74 @@ static inline int32_t get_target_pstate_default(struct cpudata *cpu) cpu->iowait_boost >>= 1; pstate = cpu->pstate.turbo_pstate; - return fp_toint((pstate + (pstate >> 2)) * busy_frac); + /* To Do: I think the above should be: +* +* if (limits.no_turbo || limits.turbo_disabled) +* pstate = cpu->pstate.max_pstate; +* else +* pstate = cpu->pstate.turbo_pstate; +* +* figure it out. +* +* no clamps. Pre-filter clamping was needed in past implementations. +* To Do: Is any pre-filter clamping needed here? */ + + unfiltered_target = (pstate + (pstate >> 2)) * busy_frac; + + /* +* Idle check. +* We have a deferrable timer. Very long durations can be +* either due to long idle (C0 time near 0), +* or due to short idle times that spanned jiffy boundaries +* (C0 time not near zero). +* +* To Do: As of the utilization stuff, I do not think the +* spanning jiffy boundaries thing is true anymore. +* Check, and fix the comment. +* +* The very long durations are 0.4 seconds or more. +* Either way, a very long duration will effectively flush +* the IIR filter, otherwise falling edge load response times +* can be on the order of tens of seconds, because this driver +* runs very rarely. Furthermore, for higher periodic loads that +* just so happen to not be in the C0 state on jiffy boundaries, +* the long ago history should be forgotten. +* For cases of durations that are a few times the set sample +* period, increase the IIR filter gain so as to weight +* the current sample more appropriately. +* +* To Do: sample_time should be forced to be accurate. For +* example if the kernel is a 250 Hz kernel, then a +* sample_rate_ms of 10 should result in a sample_time of 12. +* +* To Do: Check that the IO Boost case is not filtered too much. +*It might be that a filter by-pass is needed for the boost case. +*However, the existing gain = f(duration) might be good enough. +*/ + + duration_ns = cpu->sample.time - cpu->last_sample_time; + + scaled_gain = div_u64(int_tofp(duration_ns) * +
RE: [RFC][PATCH 7/7] cpufreq: intel_pstate: Change P-state selection algorithm for Core
On 2016.08.05 17:02 Rafael J. Wysocki wrote: >> On 2016.08.03 21:19 Doug Smythies wrote: >>> On 2016.07.31 16:49 Rafael J. Wysocki wrote: >>> >>> The PID-base P-state selection algorithm used by intel_pstate for >>> Core processors is based on very weak foundations. >> >> ...[cut]... >> >>> +static inline int32_t get_target_pstate_default(struct cpudata *cpu) >>> +{ >>> + struct sample *sample = >sample; >>> + int32_t busy_frac; >>> + int pstate; >>> + >>> + busy_frac = div_fp(sample->mperf, sample->tsc); >>> + sample->busy_scaled = busy_frac * 100; >>> + >>> + if (busy_frac < cpu->iowait_boost) >>> + busy_frac = cpu->iowait_boost; >>> + >>> + cpu->iowait_boost >>= 1; >>> + >>> + pstate = cpu->pstate.turbo_pstate; >>> + return fp_toint((pstate + (pstate >> 2)) * busy_frac); >>> +} >>> + >> My previous replies (and see below) have suggested that some filtering is needed on the target pstate, otherwise, and dependant on the type of workload, it tends to oscillate. I added the IIR (Infinite Impulse Response) filter that I have suggested in the past: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c43ef55..262ec5f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -98,6 +98,7 @@ static inline u64 div_ext_fp(u64 x, u64 y) * @tsc: Difference of time stamp counter between last and * current sample * @time: Current time from scheduler + * @target:target pstate filtered. * * This structure is used in the cpudata structure to store performance sample * data for choosing next P State. @@ -108,6 +109,7 @@ struct sample { u64 aperf; u64 mperf; u64 tsc; + u64 target; u64 time; }; @@ -1168,6 +1170,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) pstate_funcs.get_vid(cpu); intel_pstate_set_min_pstate(cpu); + cpu->sample.target = int_tofp(cpu->pstate.min_pstate); } static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) @@ -1301,8 +1304,10 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) static inline int32_t get_target_pstate_default(struct cpudata *cpu) { struct sample *sample = >sample; + int64_t scaled_gain, unfiltered_target; int32_t busy_frac; int pstate; + u64 duration_ns; busy_frac = div_fp(sample->mperf, sample->tsc); sample->busy_scaled = busy_frac * 100; @@ -1313,7 +1318,74 @@ static inline int32_t get_target_pstate_default(struct cpudata *cpu) cpu->iowait_boost >>= 1; pstate = cpu->pstate.turbo_pstate; - return fp_toint((pstate + (pstate >> 2)) * busy_frac); + /* To Do: I think the above should be: +* +* if (limits.no_turbo || limits.turbo_disabled) +* pstate = cpu->pstate.max_pstate; +* else +* pstate = cpu->pstate.turbo_pstate; +* +* figure it out. +* +* no clamps. Pre-filter clamping was needed in past implementations. +* To Do: Is any pre-filter clamping needed here? */ + + unfiltered_target = (pstate + (pstate >> 2)) * busy_frac; + + /* +* Idle check. +* We have a deferrable timer. Very long durations can be +* either due to long idle (C0 time near 0), +* or due to short idle times that spanned jiffy boundaries +* (C0 time not near zero). +* +* To Do: As of the utilization stuff, I do not think the +* spanning jiffy boundaries thing is true anymore. +* Check, and fix the comment. +* +* The very long durations are 0.4 seconds or more. +* Either way, a very long duration will effectively flush +* the IIR filter, otherwise falling edge load response times +* can be on the order of tens of seconds, because this driver +* runs very rarely. Furthermore, for higher periodic loads that +* just so happen to not be in the C0 state on jiffy boundaries, +* the long ago history should be forgotten. +* For cases of durations that are a few times the set sample +* period, increase the IIR filter gain so as to weight +* the current sample more appropriately. +* +* To Do: sample_time should be forced to be accurate. For +* example if the kernel is a 250 Hz kernel, then a +* sample_rate_ms of 10 should result in a sample_time of 12. +* +* To Do: Check that the IO Boost case is not filtered too much. +*It might be that a filter by-pass is needed for the boost case. +*However, the existing gain = f(duration) might be good enough. +*/ + + duration_ns = cpu->sample.time - cpu->last_sample_time; + + scaled_gain = div_u64(int_tofp(duration_ns) * +
[PATCH] whci: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue replaces the deprecated create_singlethread_workqueue. The workqueue "workqueue" has multiple workitems which may require ordering. Hence, a dedicated ordered workqueue has been used. Since the workqueue is not being used on a memory reclaim path, WQ_MEM_RECLAIM has not been set. Signed-off-by: Bhaktipriya Shridhar--- drivers/usb/host/whci/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c index e363723..ad8eb57 100644 --- a/drivers/usb/host/whci/init.c +++ b/drivers/usb/host/whci/init.c @@ -65,7 +65,7 @@ int whc_init(struct whc *whc) init_waitqueue_head(>cmd_wq); init_waitqueue_head(>async_list_wq); init_waitqueue_head(>periodic_list_wq); - whc->workqueue = create_singlethread_workqueue(dev_name(>umc->dev)); + whc->workqueue = alloc_ordered_workqueue(dev_name(>umc->dev), 0); if (whc->workqueue == NULL) { ret = -ENOMEM; goto error; -- 2.1.4
[PATCH] whci: Remove deprecated create_singlethread_workqueue
alloc_ordered_workqueue replaces the deprecated create_singlethread_workqueue. The workqueue "workqueue" has multiple workitems which may require ordering. Hence, a dedicated ordered workqueue has been used. Since the workqueue is not being used on a memory reclaim path, WQ_MEM_RECLAIM has not been set. Signed-off-by: Bhaktipriya Shridhar --- drivers/usb/host/whci/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c index e363723..ad8eb57 100644 --- a/drivers/usb/host/whci/init.c +++ b/drivers/usb/host/whci/init.c @@ -65,7 +65,7 @@ int whc_init(struct whc *whc) init_waitqueue_head(>cmd_wq); init_waitqueue_head(>async_list_wq); init_waitqueue_head(>periodic_list_wq); - whc->workqueue = create_singlethread_workqueue(dev_name(>umc->dev)); + whc->workqueue = alloc_ordered_workqueue(dev_name(>umc->dev), 0); if (whc->workqueue == NULL) { ret = -ENOMEM; goto error; -- 2.1.4
Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)
On Fri, Aug 12, 2016 at 09:19:19AM -0700, Paul E. McKenney wrote: > On Fri, Aug 12, 2016 at 04:26:13PM +0200, Frederic Weisbecker wrote: > > On Fri, Aug 12, 2016 at 09:23:13AM -0500, Christoph Lameter wrote: > > > On Thu, 11 Aug 2016, Paul E. McKenney wrote: > > > > > > > Heh! The only really good idea is for clocks to be reliably in sync. > > > > > > > > But if they go out of sync, what do you want to do instead? > > > > > > For a NOHZ task? Write a message to the syslog and reenable tick. > > Fair enough! Kicking off a low-priority task would achieve the latter > but not necessarily the former. And of course assumes that the worker > thread is at real-time priority with various scheduler anti-starvation > features disabled. > > > Indeed, a strong clocksource is a requirement for a full tickless machine. > > No disagrement here! ;-) I have a bot in my mind that randomly posts obvious statements about nohz_full here and then :-)
Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)
On Fri, Aug 12, 2016 at 09:19:19AM -0700, Paul E. McKenney wrote: > On Fri, Aug 12, 2016 at 04:26:13PM +0200, Frederic Weisbecker wrote: > > On Fri, Aug 12, 2016 at 09:23:13AM -0500, Christoph Lameter wrote: > > > On Thu, 11 Aug 2016, Paul E. McKenney wrote: > > > > > > > Heh! The only really good idea is for clocks to be reliably in sync. > > > > > > > > But if they go out of sync, what do you want to do instead? > > > > > > For a NOHZ task? Write a message to the syslog and reenable tick. > > Fair enough! Kicking off a low-priority task would achieve the latter > but not necessarily the former. And of course assumes that the worker > thread is at real-time priority with various scheduler anti-starvation > features disabled. > > > Indeed, a strong clocksource is a requirement for a full tickless machine. > > No disagrement here! ;-) I have a bot in my mind that randomly posts obvious statements about nohz_full here and then :-)
[PATCH v2] cfag12864b: Remove deprecated create_singlethread_workqueue
The workqueue has a single workitem(_work) and hence doesn't require ordering. Also, it is not being used on a memory reclaim path. Hence, the singlethreaded workqueue has been replaced with the use of system_wq. System workqueues have been able to handle high level of concurrency for a long time now and hence it's not required to have a singlethreaded workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue created with create_singlethread_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantee unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Work item has been sync cancelled in cfag12864b_disable() to ensure that there are no pending tasks while disconnecting the driver. Signed-off-by: Bhaktipriya Shridhar--- Changes in v2: -Used cancel_delayed_work_sync instead of cancel delayed_work to ensure that the work item is finished. drivers/auxdisplay/cfag12864b.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index 41ce4bd..551b902 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -223,12 +223,11 @@ static unsigned char *cfag12864b_cache; static DEFINE_MUTEX(cfag12864b_mutex); static unsigned char cfag12864b_updating; static void cfag12864b_update(struct work_struct *delayed_work); -static struct workqueue_struct *cfag12864b_workqueue; static DECLARE_DELAYED_WORK(cfag12864b_work, cfag12864b_update); static void cfag12864b_queue(void) { - queue_delayed_work(cfag12864b_workqueue, _work, + schedule_delayed_work(_work, HZ / cfag12864b_rate); } @@ -256,8 +255,7 @@ void cfag12864b_disable(void) if (cfag12864b_updating) { cfag12864b_updating = 0; - cancel_delayed_work(_work); - flush_workqueue(cfag12864b_workqueue); + cancel_delayed_work_sync(_work); } mutex_unlock(_mutex); @@ -357,19 +355,12 @@ static int __init cfag12864b_init(void) goto bufferalloced; } - cfag12864b_workqueue = create_singlethread_workqueue(CFAG12864B_NAME); - if (cfag12864b_workqueue == NULL) - goto cachealloced; - cfag12864b_clear(); cfag12864b_on(); cfag12864b_inited = 1; return 0; -cachealloced: - kfree(cfag12864b_cache); - bufferalloced: free_page((unsigned long) cfag12864b_buffer); @@ -381,7 +372,6 @@ static void __exit cfag12864b_exit(void) { cfag12864b_disable(); cfag12864b_off(); - destroy_workqueue(cfag12864b_workqueue); kfree(cfag12864b_cache); free_page((unsigned long) cfag12864b_buffer); } -- 2.1.4
[PATCH v2] cfag12864b: Remove deprecated create_singlethread_workqueue
The workqueue has a single workitem(_work) and hence doesn't require ordering. Also, it is not being used on a memory reclaim path. Hence, the singlethreaded workqueue has been replaced with the use of system_wq. System workqueues have been able to handle high level of concurrency for a long time now and hence it's not required to have a singlethreaded workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue created with create_singlethread_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantee unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Work item has been sync cancelled in cfag12864b_disable() to ensure that there are no pending tasks while disconnecting the driver. Signed-off-by: Bhaktipriya Shridhar --- Changes in v2: -Used cancel_delayed_work_sync instead of cancel delayed_work to ensure that the work item is finished. drivers/auxdisplay/cfag12864b.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index 41ce4bd..551b902 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -223,12 +223,11 @@ static unsigned char *cfag12864b_cache; static DEFINE_MUTEX(cfag12864b_mutex); static unsigned char cfag12864b_updating; static void cfag12864b_update(struct work_struct *delayed_work); -static struct workqueue_struct *cfag12864b_workqueue; static DECLARE_DELAYED_WORK(cfag12864b_work, cfag12864b_update); static void cfag12864b_queue(void) { - queue_delayed_work(cfag12864b_workqueue, _work, + schedule_delayed_work(_work, HZ / cfag12864b_rate); } @@ -256,8 +255,7 @@ void cfag12864b_disable(void) if (cfag12864b_updating) { cfag12864b_updating = 0; - cancel_delayed_work(_work); - flush_workqueue(cfag12864b_workqueue); + cancel_delayed_work_sync(_work); } mutex_unlock(_mutex); @@ -357,19 +355,12 @@ static int __init cfag12864b_init(void) goto bufferalloced; } - cfag12864b_workqueue = create_singlethread_workqueue(CFAG12864B_NAME); - if (cfag12864b_workqueue == NULL) - goto cachealloced; - cfag12864b_clear(); cfag12864b_on(); cfag12864b_inited = 1; return 0; -cachealloced: - kfree(cfag12864b_cache); - bufferalloced: free_page((unsigned long) cfag12864b_buffer); @@ -381,7 +372,6 @@ static void __exit cfag12864b_exit(void) { cfag12864b_disable(); cfag12864b_off(); - destroy_workqueue(cfag12864b_workqueue); kfree(cfag12864b_cache); free_page((unsigned long) cfag12864b_buffer); } -- 2.1.4