Re: [PATCH v3 04/11] arm64, mm: Move generic mmap layout functions to mm

2019-04-17 Thread Alex Ghiti

On 4/18/19 1:17 AM, Kees Cook wrote:

(

On Wed, Apr 17, 2019 at 12:27 AM Alexandre Ghiti  wrote:

arm64 handles top-down mmap layout in a way that can be easily reused
by other architectures, so make it available in mm.
It then introduces a new config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
that can be set by other architectures to benefit from those functions.
Note that this new config depends on MMU being enabled, if selected
without MMU support, a warning will be thrown.

Suggested-by: Christoph Hellwig 
Signed-off-by: Alexandre Ghiti 
---
  arch/Kconfig   |  8 
  arch/arm64/Kconfig |  1 +
  arch/arm64/include/asm/processor.h |  2 -
  arch/arm64/mm/mmap.c   | 76 --
  kernel/sysctl.c|  6 ++-
  mm/util.c  | 74 -
  6 files changed, 86 insertions(+), 81 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 3368786a..7c8965c64590 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -684,6 +684,14 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
   and vice-versa 32-bit applications to call 64-bit mmap().
   Required for applications doing different bitness syscalls.

+# This allows to use a set of generic functions to determine mmap base
+# address by giving priority to top-down scheme only if the process
+# is not in legacy mode (compat task, unlimited stack size or
+# sysctl_legacy_va_layout).
+config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+   bool
+   depends on MMU

I'd prefer the comment were moved to the help text. I would include
any details about what the arch still needs to define. For example
right now, I think STACK_RND_MASK is still needed. (Though I think a
common one could be added for this series too...)



STACK_RND_MASK may be defined by the architecture or it can use the generic
definition in mm/util.c that I moved in patch 1/11 of this series. 
That's why I moved

randomize_stack_top in this file.
Regarding the help text, I agree that it does not seem to be frequent to 
place
comment above config like that, I'll let Christoph and you decide what's 
best. And I'll

add the possibility for the arch to define its own STACK_RND_MASK.





+
  config HAVE_COPY_THREAD_TLS
 bool
 help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de..670719a26b45 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -66,6 +66,7 @@ config ARM64
 select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 5 || CC_IS_CLANG
 select ARCH_SUPPORTS_NUMA_BALANCING
 select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 select ARCH_WANT_FRAME_POINTERS
 select ARCH_HAS_UBSAN_SANITIZE_ALL
 select ARM_AMBA
diff --git a/arch/arm64/include/asm/processor.h 
b/arch/arm64/include/asm/processor.h
index 5d9ce62bdebd..4de2a2fd605a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -274,8 +274,6 @@ static inline void spin_lock_prefetch(const void *ptr)
  "nop") : : "p" (ptr));
  }

-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
  #endif

  extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index ac89686c4af8..c74224421216 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -31,82 +31,6 @@

  #include 

-/*
- * Leave enough space between the mmap area and the stack to honour ulimit in
- * the face of randomisation.
- */

This comment goes missing in the move...



True, I should have left it, sorry about that.





-#define MIN_GAP (SZ_128M)
-#define MAX_GAP(STACK_TOP/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-   if (current->personality & ADDR_COMPAT_LAYOUT)
-   return 1;
-
-   if (rlim_stack->rlim_cur == RLIM_INFINITY)
-   return 1;
-
-   return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-   unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
-   if (is_compat_task())
-   rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-   else
-#endif
-   rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-   return rnd << PAGE_SHIFT;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
-   unsigned long gap = rlim_stack->rlim_cur;
-   unsigned long pad = stack_guard_gap;
-
-   /* Account for stack randomization if necessary */
-   if (current->flags & PF_RANDOMIZE)
-   pad += (STACK_RND_MASK << PAGE_SHIFT);
-
-   /* Values close to RLIM_INFINITY can overflow. */
-   if (gap + pad > gap)
-   gap += pad;
-
-   if (gap < MIN_GAP)
-   gap = MIN_GAP;
-   else if (gap > MAX_GAP)
-   gap = MAX_GAP;
-
-   return PAGE_ALIGN(STACK_TOP - gap - rnd);
-}
-
-/*
- * This 

Re: [PATCH v3 07/11] arm: Use generic mmap top-down layout

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:30 AM Alexandre Ghiti  wrote:
>
> arm uses a top-down mmap layout by default that exactly fits the generic
> functions, so get rid of arch specific code and use the generic version
> by selecting ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT.
>
> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/arm/Kconfig |  1 +
>  arch/arm/include/asm/processor.h |  2 --
>  arch/arm/mm/mmap.c   | 62 
>  3 files changed, 1 insertion(+), 64 deletions(-)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 850b4805e2d1..f8f603da181f 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -28,6 +28,7 @@ config ARM
> select ARCH_SUPPORTS_ATOMIC_RMW
> select ARCH_USE_BUILTIN_BSWAP
> select ARCH_USE_CMPXCHG_LOCKREF
> +   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> select ARCH_WANT_IPC_PARSE_VERSION
> select BUILDTIME_EXTABLE_SORT if MMU
> select CLONE_BACKWARDS
> diff --git a/arch/arm/include/asm/processor.h 
> b/arch/arm/include/asm/processor.h
> index 57fe73ea0f72..944ef1fb1237 100644
> --- a/arch/arm/include/asm/processor.h
> +++ b/arch/arm/include/asm/processor.h
> @@ -143,8 +143,6 @@ static inline void prefetchw(const void *ptr)
>  #endif
>  #endif
>
> -#define HAVE_ARCH_PICK_MMAP_LAYOUT
> -
>  #endif
>
>  #endif /* __ASM_ARM_PROCESSOR_H */
> diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
> index 0b94b674aa91..b8d912ac9e61 100644
> --- a/arch/arm/mm/mmap.c
> +++ b/arch/arm/mm/mmap.c
> @@ -17,43 +17,6 @@
> addr)+SHMLBA-1)&~(SHMLBA-1)) +  \
>  (((pgoff)<
> -/* gap between mmap and stack */
> -#define MIN_GAP(128*1024*1024UL)
> -#define MAX_GAP((STACK_TOP)/6*5)
> -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
> -
> -static int mmap_is_legacy(struct rlimit *rlim_stack)
> -{
> -   if (current->personality & ADDR_COMPAT_LAYOUT)
> -   return 1;
> -
> -   if (rlim_stack->rlim_cur == RLIM_INFINITY)
> -   return 1;
> -
> -   return sysctl_legacy_va_layout;
> -}
> -
> -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
> -{
> -   unsigned long gap = rlim_stack->rlim_cur;
> -   unsigned long pad = stack_guard_gap;
> -
> -   /* Account for stack randomization if necessary */
> -   if (current->flags & PF_RANDOMIZE)
> -   pad += (STACK_RND_MASK << PAGE_SHIFT);
> -
> -   /* Values close to RLIM_INFINITY can overflow. */
> -   if (gap + pad > gap)
> -   gap += pad;
> -
> -   if (gap < MIN_GAP)
> -   gap = MIN_GAP;
> -   else if (gap > MAX_GAP)
> -   gap = MAX_GAP;
> -
> -   return PAGE_ALIGN(STACK_TOP - gap - rnd);
> -}
> -
>  /*
>   * We need to ensure that shared mappings are correctly aligned to
>   * avoid aliasing issues with VIPT caches.  We need to ensure that
> @@ -181,31 +144,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const 
> unsigned long addr0,
> return addr;
>  }
>
> -unsigned long arch_mmap_rnd(void)
> -{
> -   unsigned long rnd;
> -
> -   rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
> -
> -   return rnd << PAGE_SHIFT;
> -}
> -
> -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
> -{
> -   unsigned long random_factor = 0UL;
> -
> -   if (current->flags & PF_RANDOMIZE)
> -   random_factor = arch_mmap_rnd();
> -
> -   if (mmap_is_legacy(rlim_stack)) {
> -   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
> -   mm->get_unmapped_area = arch_get_unmapped_area;
> -   } else {
> -   mm->mmap_base = mmap_base(random_factor, rlim_stack);
> -   mm->get_unmapped_area = arch_get_unmapped_area_topdown;
> -   }
> -}
> -
>  /*
>   * You really shouldn't be using read() or write() on /dev/mem.  This
>   * might go away in the future.
> --
> 2.20.1
>


-- 
Kees Cook


Re: [PATCH v3 05/11] arm: Properly account for stack randomization and stack guard gap

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:28 AM Alexandre Ghiti  wrote:
>
> This commit takes care of stack randomization and stack guard gap when
> computing mmap base address and checks if the task asked for randomization.
> This fixes the problem uncovered and not fixed for arm here:
> https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1429066.html

Please use the official archive instead. This includes headers, linked
patches, etc:
https://lkml.kernel.org/r/20170622200033.25714-1-r...@redhat.com

> Signed-off-by: Alexandre Ghiti 
> ---
>  arch/arm/mm/mmap.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
> index f866870db749..bff3d00bda5b 100644
> --- a/arch/arm/mm/mmap.c
> +++ b/arch/arm/mm/mmap.c
> @@ -18,8 +18,9 @@
>  (((pgoff)<
>  /* gap between mmap and stack */
> -#define MIN_GAP (128*1024*1024UL)
> -#define MAX_GAP ((TASK_SIZE)/6*5)
> +#define MIN_GAP(128*1024*1024UL)

Might as well fix this up as SIZE_128M

> +#define MAX_GAP((TASK_SIZE)/6*5)
> +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))

STACK_RND_MASK is already defined so you don't need to add it here, yes?

>  static int mmap_is_legacy(struct rlimit *rlim_stack)
>  {
> @@ -35,6 +36,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
>  static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
>  {
> unsigned long gap = rlim_stack->rlim_cur;
> +   unsigned long pad = stack_guard_gap;
> +
> +   /* Account for stack randomization if necessary */
> +   if (current->flags & PF_RANDOMIZE)
> +   pad += (STACK_RND_MASK << PAGE_SHIFT);
> +
> +   /* Values close to RLIM_INFINITY can overflow. */
> +   if (gap + pad > gap)
> +   gap += pad;
>
> if (gap < MIN_GAP)
> gap = MIN_GAP;
> --
> 2.20.1
>

But otherwise, yes:

Acked-by: Kees Cook 

--
Kees Cook


Re: [PATCH v3 11/11] riscv: Make mmap allocation top-down by default

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:34 AM Alexandre Ghiti  wrote:
>
> In order to avoid wasting user address space by using bottom-up mmap
> allocation scheme, prefer top-down scheme when possible.
>
> Before:
> root@qemuriscv64:~# cat /proc/self/maps
> 0001-00016000 r-xp  fe:00 6389   /bin/cat.coreutils
> 00016000-00017000 r--p 5000 fe:00 6389   /bin/cat.coreutils
> 00017000-00018000 rw-p 6000 fe:00 6389   /bin/cat.coreutils
> 00018000-00039000 rw-p  00:00 0  [heap]
> 156000-16d000 r-xp  fe:00 7193   /lib/ld-2.28.so
> 16d000-16e000 r--p 00016000 fe:00 7193   /lib/ld-2.28.so
> 16e000-16f000 rw-p 00017000 fe:00 7193   /lib/ld-2.28.so
> 16f000-17 rw-p  00:00 0
> 17-172000 r-xp  00:00 0  [vdso]
> 174000-176000 rw-p  00:00 0
> 176000-1555674000 r-xp  fe:00 7187   /lib/libc-2.28.so
> 1555674000-1555678000 r--p 000fd000 fe:00 7187   /lib/libc-2.28.so
> 1555678000-155567a000 rw-p 00101000 fe:00 7187   /lib/libc-2.28.so
> 155567a000-15556a rw-p  00:00 0
> 3fffb9-3fffbb1000 rw-p  00:00 0  [stack]
>
> After:
> root@qemuriscv64:~# cat /proc/self/maps
> 0001-00016000 r-xp  fe:00 6389   /bin/cat.coreutils
> 00016000-00017000 r--p 5000 fe:00 6389   /bin/cat.coreutils
> 00017000-00018000 rw-p 6000 fe:00 6389   /bin/cat.coreutils
> 00018000-00039000 rw-p  00:00 0  [heap]
> 3ff7eb6000-3ff7ed8000 rw-p  00:00 0
> 3ff7ed8000-3ff7fd6000 r-xp  fe:00 7187   /lib/libc-2.28.so
> 3ff7fd6000-3ff7fda000 r--p 000fd000 fe:00 7187   /lib/libc-2.28.so
> 3ff7fda000-3ff7fdc000 rw-p 00101000 fe:00 7187   /lib/libc-2.28.so
> 3ff7fdc000-3ff7fe2000 rw-p  00:00 0
> 3ff7fe4000-3ff7fe6000 r-xp  00:00 0  [vdso]
> 3ff7fe6000-3ff7ffd000 r-xp  fe:00 7193   /lib/ld-2.28.so
> 3ff7ffd000-3ff7ffe000 r--p 00016000 fe:00 7193   /lib/ld-2.28.so
> 3ff7ffe000-3ff7fff000 rw-p 00017000 fe:00 7193   /lib/ld-2.28.so
> 3ff7fff000-3ff800 rw-p  00:00 0
> 3fff888000-3fff8a9000 rw-p  00:00 0  [stack]
>
> Signed-off-by: Alexandre Ghiti 
> Reviewed-by: Christoph Hellwig 

Reviewed-by: Kees Cook 

-Kees

> ---
>  arch/riscv/Kconfig | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index eb56c82d8aa1..f5897e0dbc1c 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -49,6 +49,17 @@ config RISCV
> select GENERIC_IRQ_MULTI_HANDLER
> select ARCH_HAS_PTE_SPECIAL
> select HAVE_EBPF_JIT if 64BIT
> +   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> +   select HAVE_ARCH_MMAP_RND_BITS
> +
> +config ARCH_MMAP_RND_BITS_MIN
> +   default 18
> +
> +# max bits determined by the following formula:
> +#  VA_BITS - PAGE_SHIFT - 3
> +config ARCH_MMAP_RND_BITS_MAX
> +   default 33 if 64BIT # SV48 based
> +   default 18
>
>  config MMU
> def_bool y
> --
> 2.20.1
>


-- 
Kees Cook


Re: [PATCH v3 08/11] mips: Properly account for stack randomization and stack guard gap

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:31 AM Alexandre Ghiti  wrote:
>
> This commit takes care of stack randomization and stack guard gap when
> computing mmap base address and checks if the task asked for randomization.
> This fixes the problem uncovered and not fixed for mips here:
> https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1429066.html

same URL change here please...

>
> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/mips/mm/mmap.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
> index 2f616ebeb7e0..3ff82c6f7e24 100644
> --- a/arch/mips/mm/mmap.c
> +++ b/arch/mips/mm/mmap.c
> @@ -21,8 +21,9 @@ unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane 
> caches */
>  EXPORT_SYMBOL(shm_align_mask);
>
>  /* gap between mmap and stack */
> -#define MIN_GAP (128*1024*1024UL)
> -#define MAX_GAP ((TASK_SIZE)/6*5)
> +#define MIN_GAP(128*1024*1024UL)
> +#define MAX_GAP((TASK_SIZE)/6*5)
> +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
>
>  static int mmap_is_legacy(struct rlimit *rlim_stack)
>  {
> @@ -38,6 +39,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
>  static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
>  {
> unsigned long gap = rlim_stack->rlim_cur;
> +   unsigned long pad = stack_guard_gap;
> +
> +   /* Account for stack randomization if necessary */
> +   if (current->flags & PF_RANDOMIZE)
> +   pad += (STACK_RND_MASK << PAGE_SHIFT);
> +
> +   /* Values close to RLIM_INFINITY can overflow. */
> +   if (gap + pad > gap)
> +   gap += pad;
>
> if (gap < MIN_GAP)
> gap = MIN_GAP;
> --
> 2.20.1
>

-- 
Kees Cook


Re: [PATCH v2] clk: hi3660: Mark clk_gate_ufs_subsys as critical

2019-04-17 Thread Leo Yan
Hi Michael, Stephen,

On Wed, Mar 20, 2019 at 06:05:08PM +0800, Leo Yan wrote:
> clk_gate_ufs_subsys is a system bus clock, turning off it will
> introduce lockup issue during system suspend flow.  Let's mark
> clk_gate_ufs_subsys as critical clock, thus keeps it on during
> system suspend and resume.

Could you pick up this patch?  Or should I resend it?

Thanks,
Leo Yan

> Fixes: d374e6fd5088 ("clk: hisilicon: Add clock driver for hi3660 SoC")
> Cc: sta...@vger.kernel.org
> Cc: Zhong Kaihua 
> Cc: John Stultz 
> Cc: Zhangfei Gao 
> Suggested-by: Dong Zhang 
> Signed-off-by: Leo Yan 
> ---
>  drivers/clk/hisilicon/clk-hi3660.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/clk/hisilicon/clk-hi3660.c 
> b/drivers/clk/hisilicon/clk-hi3660.c
> index f40419959656..794eeff0d5d2 100644
> --- a/drivers/clk/hisilicon/clk-hi3660.c
> +++ b/drivers/clk/hisilicon/clk-hi3660.c
> @@ -163,8 +163,12 @@ static const struct hisi_gate_clock 
> hi3660_crgctrl_gate_sep_clks[] = {
> "clk_isp_snclk_mux", CLK_SET_RATE_PARENT, 0x50, 17, 0, },
>   { HI3660_CLK_GATE_ISP_SNCLK2, "clk_gate_isp_snclk2",
> "clk_isp_snclk_mux", CLK_SET_RATE_PARENT, 0x50, 18, 0, },
> + /*
> +  * clk_gate_ufs_subsys is a system bus clock, mark it as critical
> +  * clock and keep it on for system suspend and resume.
> +  */
>   { HI3660_CLK_GATE_UFS_SUBSYS, "clk_gate_ufs_subsys", "clk_div_sysbus",
> -   CLK_SET_RATE_PARENT, 0x50, 21, 0, },
> +   CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, 0x50, 21, 0, },
>   { HI3660_PCLK_GATE_DSI0, "pclk_gate_dsi0", "clk_div_cfgbus",
> CLK_SET_RATE_PARENT, 0x50, 28, 0, },
>   { HI3660_PCLK_GATE_DSI1, "pclk_gate_dsi1", "clk_div_cfgbus",
> -- 
> 2.17.1
> 


Re: [PATCH v3 09/11] mips: Use STACK_TOP when computing mmap base address

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:32 AM Alexandre Ghiti  wrote:
>
> mmap base address must be computed wrt stack top address, using TASK_SIZE
> is wrong since STACK_TOP and TASK_SIZE are not equivalent.
>
> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/mips/mm/mmap.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
> index 3ff82c6f7e24..ffbe69f3a7d9 100644
> --- a/arch/mips/mm/mmap.c
> +++ b/arch/mips/mm/mmap.c
> @@ -22,7 +22,7 @@ EXPORT_SYMBOL(shm_align_mask);
>
>  /* gap between mmap and stack */
>  #define MIN_GAP(128*1024*1024UL)
> -#define MAX_GAP((TASK_SIZE)/6*5)
> +#define MAX_GAP((STACK_TOP)/6*5)
>  #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
>
>  static int mmap_is_legacy(struct rlimit *rlim_stack)
> @@ -54,7 +54,7 @@ static unsigned long mmap_base(unsigned long rnd, struct 
> rlimit *rlim_stack)
> else if (gap > MAX_GAP)
> gap = MAX_GAP;
>
> -   return PAGE_ALIGN(TASK_SIZE - gap - rnd);
> +   return PAGE_ALIGN(STACK_TOP - gap - rnd);
>  }
>
>  #define COLOUR_ALIGN(addr, pgoff)  \
> --
> 2.20.1
>


-- 
Kees Cook


Re: [PATCH v3 10/11] mips: Use generic mmap top-down layout

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:33 AM Alexandre Ghiti  wrote:
>
> mips uses a top-down layout by default that fits the generic functions.
> At the same time, this commit allows to fix problem uncovered
> and not fixed for mips here:
> https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1429066.html
>
> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/mips/Kconfig |  1 +
>  arch/mips/include/asm/processor.h |  5 ---
>  arch/mips/mm/mmap.c   | 67 ---
>  3 files changed, 1 insertion(+), 72 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 4a5f5b0ee9a9..ec2f07561e4d 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -14,6 +14,7 @@ config MIPS
> select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
> select ARCH_USE_QUEUED_RWLOCKS
> select ARCH_USE_QUEUED_SPINLOCKS
> +   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> select ARCH_WANT_IPC_PARSE_VERSION
> select BUILDTIME_EXTABLE_SORT
> select CLONE_BACKWARDS
> diff --git a/arch/mips/include/asm/processor.h 
> b/arch/mips/include/asm/processor.h
> index aca909bd7841..fba18d4a9190 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -29,11 +29,6 @@
>
>  extern unsigned int vced_count, vcei_count;
>
> -/*
> - * MIPS does have an arch_pick_mmap_layout()
> - */
> -#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
> -
>  #ifdef CONFIG_32BIT
>  #ifdef CONFIG_KVM_GUEST
>  /* User space process size is limited to 1GB in KVM Guest Mode */
> diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
> index ffbe69f3a7d9..61e65a69bb09 100644
> --- a/arch/mips/mm/mmap.c
> +++ b/arch/mips/mm/mmap.c
> @@ -20,43 +20,6 @@
>  unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
>  EXPORT_SYMBOL(shm_align_mask);
>
> -/* gap between mmap and stack */
> -#define MIN_GAP(128*1024*1024UL)
> -#define MAX_GAP((STACK_TOP)/6*5)
> -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
> -
> -static int mmap_is_legacy(struct rlimit *rlim_stack)
> -{
> -   if (current->personality & ADDR_COMPAT_LAYOUT)
> -   return 1;
> -
> -   if (rlim_stack->rlim_cur == RLIM_INFINITY)
> -   return 1;
> -
> -   return sysctl_legacy_va_layout;
> -}
> -
> -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
> -{
> -   unsigned long gap = rlim_stack->rlim_cur;
> -   unsigned long pad = stack_guard_gap;
> -
> -   /* Account for stack randomization if necessary */
> -   if (current->flags & PF_RANDOMIZE)
> -   pad += (STACK_RND_MASK << PAGE_SHIFT);
> -
> -   /* Values close to RLIM_INFINITY can overflow. */
> -   if (gap + pad > gap)
> -   gap += pad;
> -
> -   if (gap < MIN_GAP)
> -   gap = MIN_GAP;
> -   else if (gap > MAX_GAP)
> -   gap = MAX_GAP;
> -
> -   return PAGE_ALIGN(STACK_TOP - gap - rnd);
> -}
> -
>  #define COLOUR_ALIGN(addr, pgoff)  \
> addr) + shm_align_mask) & ~shm_align_mask) +\
>  (((pgoff) << PAGE_SHIFT) & shm_align_mask))
> @@ -154,36 +117,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file 
> *filp,
> addr0, len, pgoff, flags, DOWN);
>  }
>
> -unsigned long arch_mmap_rnd(void)
> -{
> -   unsigned long rnd;
> -
> -#ifdef CONFIG_COMPAT
> -   if (TASK_IS_32BIT_ADDR)
> -   rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
> -   else
> -#endif /* CONFIG_COMPAT */
> -   rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
> -
> -   return rnd << PAGE_SHIFT;
> -}
> -
> -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
> -{
> -   unsigned long random_factor = 0UL;
> -
> -   if (current->flags & PF_RANDOMIZE)
> -   random_factor = arch_mmap_rnd();
> -
> -   if (mmap_is_legacy(rlim_stack)) {
> -   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
> -   mm->get_unmapped_area = arch_get_unmapped_area;
> -   } else {
> -   mm->mmap_base = mmap_base(random_factor, rlim_stack);
> -   mm->get_unmapped_area = arch_get_unmapped_area_topdown;
> -   }
> -}
> -
>  static inline unsigned long brk_rnd(void)
>  {
> unsigned long rnd = get_random_long();
> --
> 2.20.1
>


-- 
Kees Cook


Re: [PATCH] pci/switchtec: fix stream_open.cocci warnings (fwd)

2019-04-17 Thread Julia Lawall



On Wed, 17 Apr 2019, Bjorn Helgaas wrote:

> On Sat, Apr 13, 2019 at 06:50:57PM +0200, Julia Lawall wrote:
> > Hello,
> >
> > Kirill will explain about this issue.
> >
> > julia
> >
> > -- Forwarded message --
> > Date: Sat, 13 Apr 2019 11:22:51 +0800
> > From: kbuild test robot 
> > To: kbu...@01.org
> > Cc: Julia Lawall 
> > Subject: [PATCH] pci/switchtec: fix stream_open.cocci warnings
> >
> > CC: kbuild-...@01.org
> > TO: Sebastian Andrzej Siewior 
> > CC: Kurt Schwemmer 
> > CC: Logan Gunthorpe 
> > CC: Bjorn Helgaas 
> > CC: linux-...@vger.kernel.org
> > CC: linux-kernel@vger.kernel.org
> >
> > From: kbuild test robot 
> >
> > drivers/pci/switch/switchtec.c:395:1-17: ERROR: switchtec_fops: .read() can 
> > deadlock .write(); change nonseekable_open -> stream_open to fix.
> >
> > Generated by: scripts/coccinelle/api/stream_open.cocci
> >
> > Fixes: 8a29a3bae2a2 ("pci/switchtec: Don't use completion's wait queue")
> > Signed-off-by: kbuild test robot 
>
> Based on Kirill's subsequent email saying this is already queued to
> the merge window, I assume I need to do nothing here.
>
> I think a signed-off-by from a robot, i.e., not from a real person, is
> meaningless, and I don't think I would personally accept it.  It's
> certainly OK to indicate that a patch was auto-generated, but I think
> a real person still needs to take responsibility for it.
>
> Documentation/process/submitting-patches.rst says it must contain a
> real name (no pseudonyms or anonymous contributions), and I don't
> think a robot fits in the spirit of that.
>
> I see that
> https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git/commit/?id=8a29a3bae2a2
> (mentioned below) does have a good signed-off-by from Sebastian, but
> that's not *this* patch, so I don't know what's what.

Normally, for these robot generated patches, when I approve them, I put my
own sign off, but under the robot one, since the robot has put a From
line.  In this case, I handed the problem off to Kirill, so I didn't do
that.  I agree that it would be good for Kirill to sign off on it.

julia



>
> Bjorn
>
> > ---
> >
> > tree:   
> > https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git 
> > linux-5.0.y-rt-rebase
> > head:   794c294ae4483c240429c25a0d18e272e92c94de
> > commit: 8a29a3bae2a2dfb0116cd8791d9700515d6e765e [154/311] pci/switchtec: 
> > Don't use completion's wait queue
> > :: branch date: 7 hours ago
> > :: commit date: 7 hours ago
> >
> > Please take the patch only if it's a positive warning. Thanks!
> >
> >  switchtec.c |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > --- a/drivers/pci/switch/switchtec.c
> > +++ b/drivers/pci/switch/switchtec.c
> > @@ -392,7 +392,7 @@ static int switchtec_dev_open(struct ino
> > return PTR_ERR(stuser);
> >
> > filp->private_data = stuser;
> > -   nonseekable_open(inode, filp);
> > +   stream_open(inode, filp);
> >
> > dev_dbg(>dev, "%s: %p\n", __func__, stuser);
> >
>


linux-next: manual merge of the phy-next tree with the qcom tree

2019-04-17 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the phy-next tree got a conflict in:

  Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt

between commit:

  369b89366a3d ("dt-bindings: phy-qcom-qmp: Tweak qcom,msm8998-qmp-ufs-phy")

from the qcom tree and commit:

  2815588ea64b ("dt-bindings: phy-qcom-qmp: Add qcom,msm8998-qmp-pcie-phy")

from the phy-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index d882e40c3bd9,5fca57b12534..
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@@ -69,8 -73,9 +73,10 @@@ Required properties
"phy", "common".
For "qcom,msm8998-qmp-usb3-phy" must contain
"phy", "common".
 -  For "qcom,msm8998-qmp-ufs-phy": no resets are listed.
 +  For "qcom,msm8998-qmp-ufs-phy": must contain:
 +  "ufsphy".
+   For "qcom,msm8998-qmp-pcie-phy" must contain:
+   "phy", "common".
For "qcom,sdm845-qmp-usb3-phy" must contain:
"phy", "common".
For "qcom,sdm845-qmp-usb3-uni-phy" must contain:


pgphH817oMZCw.pgp
Description: OpenPGP digital signature


Re: [PATCH V2 2/2] arm64/mm: Enable memory hot remove

2019-04-17 Thread Anshuman Khandual
On 04/17/2019 11:09 PM, Mark Rutland wrote:
> On Wed, Apr 17, 2019 at 10:15:35PM +0530, Anshuman Khandual wrote:
>> On 04/17/2019 07:51 PM, Mark Rutland wrote:
>>> On Wed, Apr 17, 2019 at 03:28:18PM +0530, Anshuman Khandual wrote:
 On 04/15/2019 07:18 PM, Mark Rutland wrote:
> On Sun, Apr 14, 2019 at 11:29:13AM +0530, Anshuman Khandual wrote:
> 
>> +spin_unlock(_mm.page_table_lock);
>
> What precisely is the page_table_lock intended to protect?

 Concurrent modification to kernel page table (init_mm) while clearing 
 entries.
>>>
>>> Concurrent modification by what code?
>>>
>>> If something else can *modify* the portion of the table that we're
>>> manipulating, then I don't see how we can safely walk the table up to
>>> this point without holding the lock, nor how we can safely add memory.
>>>
>>> Even if this is to protect something else which *reads* the tables,
>>> other code in arm64 which modifies the kernel page tables doesn't take
>>> the lock.
>>>
>>> Usually, if you can do a lockless walk you have to verify that things
>>> didn't change once you've taken the lock, but we don't follow that
>>> pattern here.
>>>
>>> As things stand it's not clear to me whether this is necessary or
>>> sufficient.
>>
>> Hence lets take more conservative approach and wrap the entire process of
>> remove_pagetable() under init_mm.page_table_lock which looks safe unless
>> in the worst case when free_pages() gets stuck for some reason in which
>> case we have bigger memory problem to deal with than a soft lock up.
> 
> Sorry, but I'm not happy with _any_ solution until we understand where
> and why we need to take the init_mm ptl, and have made some effort to
> ensure that the kernel correctly does so elsewhere. It is not sufficient
> to consider this code in isolation.

We will have to take the kernel page table lock to prevent assumption regarding
present or future possible kernel VA space layout. Wrapping around the entire
remove_pagetable() will be at coarse granularity but I dont see why it should
not sufficient atleast from this particular tear down operation regardless of
how this might affect other kernel pgtable walkers.

IIUC your concern is regarding other parts of kernel code (arm64/generic) which
assume that kernel page table wont be changing and hence they normally walk the
table without holding pgtable lock. Hence those current pgtabe walker will be
affected after this change.

> 
> IIUC, before this patch we never clear non-leaf entries in the kernel
> page tables, so readers don't presently need to take the ptl in order to
> safely walk down to a leaf entry.

Got it. Will look into this.

> 
> For example, the arm64 ptdump code never takes the ptl, and as of this
> patch it will blow up if it races with a hot-remove, regardless of
> whether the hot-remove code itself holds the ptl.

Got it. Are there there more such examples where this can be problematic. I
will be happy to investigate all such places and change/add locking scheme
in there to make them work with memory hot remove.

> 
> Note that the same applies to the x86 ptdump code; we cannot assume that
> just because x86 does something that it happens to be correct.

I understand. Will look into other non-x86 platforms as well on how they are
dealing with this.

> 
> I strongly suspect there are other cases that would fall afoul of this,
> in both arm64 and generic code.

Will start looking into all such possible cases both on arm64 and generic.
Mean while more such pointers would be really helpful.


Re: [PATCH v3 06/11] arm: Use STACK_TOP when computing mmap base address

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:29 AM Alexandre Ghiti  wrote:
>
> mmap base address must be computed wrt stack top address, using TASK_SIZE
> is wrong since STACK_TOP and TASK_SIZE are not equivalent.
>
> Signed-off-by: Alexandre Ghiti 
> ---
>  arch/arm/mm/mmap.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
> index bff3d00bda5b..0b94b674aa91 100644
> --- a/arch/arm/mm/mmap.c
> +++ b/arch/arm/mm/mmap.c
> @@ -19,7 +19,7 @@
>
>  /* gap between mmap and stack */
>  #define MIN_GAP(128*1024*1024UL)
> -#define MAX_GAP((TASK_SIZE)/6*5)
> +#define MAX_GAP((STACK_TOP)/6*5)

Parens around STACK_TOP aren't needed, but you'll be removing it
entirely, so I can't complain. ;)

>  #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))
>
>  static int mmap_is_legacy(struct rlimit *rlim_stack)
> @@ -51,7 +51,7 @@ static unsigned long mmap_base(unsigned long rnd, struct 
> rlimit *rlim_stack)
> else if (gap > MAX_GAP)
> gap = MAX_GAP;
>
> -   return PAGE_ALIGN(TASK_SIZE - gap - rnd);
> +   return PAGE_ALIGN(STACK_TOP - gap - rnd);
>  }
>
>  /*
> --
> 2.20.1
>

Acked-by: Kees Cook 

-- 
Kees Cook


Re: [PATCH v3 04/11] arm64, mm: Move generic mmap layout functions to mm

2019-04-17 Thread Kees Cook
(

On Wed, Apr 17, 2019 at 12:27 AM Alexandre Ghiti  wrote:
>
> arm64 handles top-down mmap layout in a way that can be easily reused
> by other architectures, so make it available in mm.
> It then introduces a new config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> that can be set by other architectures to benefit from those functions.
> Note that this new config depends on MMU being enabled, if selected
> without MMU support, a warning will be thrown.
>
> Suggested-by: Christoph Hellwig 
> Signed-off-by: Alexandre Ghiti 
> ---
>  arch/Kconfig   |  8 
>  arch/arm64/Kconfig |  1 +
>  arch/arm64/include/asm/processor.h |  2 -
>  arch/arm64/mm/mmap.c   | 76 --
>  kernel/sysctl.c|  6 ++-
>  mm/util.c  | 74 -
>  6 files changed, 86 insertions(+), 81 deletions(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 3368786a..7c8965c64590 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -684,6 +684,14 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
>   and vice-versa 32-bit applications to call 64-bit mmap().
>   Required for applications doing different bitness syscalls.
>
> +# This allows to use a set of generic functions to determine mmap base
> +# address by giving priority to top-down scheme only if the process
> +# is not in legacy mode (compat task, unlimited stack size or
> +# sysctl_legacy_va_layout).
> +config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> +   bool
> +   depends on MMU

I'd prefer the comment were moved to the help text. I would include
any details about what the arch still needs to define. For example
right now, I think STACK_RND_MASK is still needed. (Though I think a
common one could be added for this series too...)

> +
>  config HAVE_COPY_THREAD_TLS
> bool
> help
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 7e34b9eba5de..670719a26b45 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -66,6 +66,7 @@ config ARM64
> select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 5 || CC_IS_CLANG
> select ARCH_SUPPORTS_NUMA_BALANCING
> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
> +   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> select ARCH_WANT_FRAME_POINTERS
> select ARCH_HAS_UBSAN_SANITIZE_ALL
> select ARM_AMBA
> diff --git a/arch/arm64/include/asm/processor.h 
> b/arch/arm64/include/asm/processor.h
> index 5d9ce62bdebd..4de2a2fd605a 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -274,8 +274,6 @@ static inline void spin_lock_prefetch(const void *ptr)
>  "nop") : : "p" (ptr));
>  }
>
> -#define HAVE_ARCH_PICK_MMAP_LAYOUT
> -
>  #endif
>
>  extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
> diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
> index ac89686c4af8..c74224421216 100644
> --- a/arch/arm64/mm/mmap.c
> +++ b/arch/arm64/mm/mmap.c
> @@ -31,82 +31,6 @@
>
>  #include 
>
> -/*
> - * Leave enough space between the mmap area and the stack to honour ulimit in
> - * the face of randomisation.
> - */

This comment goes missing in the move...

> -#define MIN_GAP (SZ_128M)
> -#define MAX_GAP(STACK_TOP/6*5)
> -
> -static int mmap_is_legacy(struct rlimit *rlim_stack)
> -{
> -   if (current->personality & ADDR_COMPAT_LAYOUT)
> -   return 1;
> -
> -   if (rlim_stack->rlim_cur == RLIM_INFINITY)
> -   return 1;
> -
> -   return sysctl_legacy_va_layout;
> -}
> -
> -unsigned long arch_mmap_rnd(void)
> -{
> -   unsigned long rnd;
> -
> -#ifdef CONFIG_COMPAT
> -   if (is_compat_task())
> -   rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
> -   else
> -#endif
> -   rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
> -   return rnd << PAGE_SHIFT;
> -}
> -
> -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
> -{
> -   unsigned long gap = rlim_stack->rlim_cur;
> -   unsigned long pad = stack_guard_gap;
> -
> -   /* Account for stack randomization if necessary */
> -   if (current->flags & PF_RANDOMIZE)
> -   pad += (STACK_RND_MASK << PAGE_SHIFT);
> -
> -   /* Values close to RLIM_INFINITY can overflow. */
> -   if (gap + pad > gap)
> -   gap += pad;
> -
> -   if (gap < MIN_GAP)
> -   gap = MIN_GAP;
> -   else if (gap > MAX_GAP)
> -   gap = MAX_GAP;
> -
> -   return PAGE_ALIGN(STACK_TOP - gap - rnd);
> -}
> -
> -/*
> - * This function, called very early during the creation of a new process VM
> - * image, sets up which VM layout function to use:
> - */
> -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
> -{
> -   unsigned long random_factor = 0UL;
> -
> -   if (current->flags & PF_RANDOMIZE)
> -

Re: [PATCH v3 03/11] arm64: Consider stack randomization for mmap base only when necessary

2019-04-17 Thread Alex Ghiti

On 4/18/19 12:37 AM, Kees Cook wrote:

On Wed, Apr 17, 2019 at 12:26 AM Alexandre Ghiti  wrote:

Do not offset mmap base address because of stack randomization if
current task does not want randomization.

Maybe mention that this makes this logic match the existing x86 behavior too?



Ok I will add this in case of a v4.





Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 


Thanks !




-Kees


---
  arch/arm64/mm/mmap.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index ed4f9915f2b8..ac89686c4af8 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -65,7 +65,11 @@ unsigned long arch_mmap_rnd(void)
  static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
  {
 unsigned long gap = rlim_stack->rlim_cur;
-   unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
+   unsigned long pad = stack_guard_gap;
+
+   /* Account for stack randomization if necessary */
+   if (current->flags & PF_RANDOMIZE)
+   pad += (STACK_RND_MASK << PAGE_SHIFT);

 /* Values close to RLIM_INFINITY can overflow. */
 if (gap + pad > gap)
--
2.20.1





Re: [PATCH v3 02/11] arm64: Make use of is_compat_task instead of hardcoding this test

2019-04-17 Thread Alex Ghiti

On 4/18/19 12:32 AM, Kees Cook wrote:

On Wed, Apr 17, 2019 at 12:25 AM Alexandre Ghiti  wrote:

Each architecture has its own way to determine if a task is a compat task,
by using is_compat_task in arch_mmap_rnd, it allows more genericity and
then it prepares its moving to mm/.

Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 



Thanks !



-Kees


---
  arch/arm64/mm/mmap.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 842c8a5fcd53..ed4f9915f2b8 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -54,7 +54,7 @@ unsigned long arch_mmap_rnd(void)
 unsigned long rnd;

  #ifdef CONFIG_COMPAT
-   if (test_thread_flag(TIF_32BIT))
+   if (is_compat_task())
 rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 else
  #endif
--
2.20.1





Re: [PATCH v3 01/11] mm, fs: Move randomize_stack_top from fs to mm

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:24 AM Alexandre Ghiti  wrote:
>
> This preparatory commit moves this function so that further introduction
> of generic topdown mmap layout is contained only in mm/util.c.
>
> Signed-off-by: Alexandre Ghiti 
> Reviewed-by: Christoph Hellwig 
> ---
>  fs/binfmt_elf.c| 20 
>  include/linux/mm.h |  2 ++
>  mm/util.c  | 22 ++
>  3 files changed, 24 insertions(+), 20 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 7d09d125f148..045f3b29d264 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -662,26 +662,6 @@ static unsigned long load_elf_interp(struct elfhdr 
> *interp_elf_ex,
>   * libraries.  There is no binary dependent code anywhere else.
>   */
>
> -#ifndef STACK_RND_MASK
> -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))/* 8MB of VA */
> -#endif
> -
> -static unsigned long randomize_stack_top(unsigned long stack_top)
> -{
> -   unsigned long random_variable = 0;
> -
> -   if (current->flags & PF_RANDOMIZE) {
> -   random_variable = get_random_long();
> -   random_variable &= STACK_RND_MASK;
> -   random_variable <<= PAGE_SHIFT;
> -   }
> -#ifdef CONFIG_STACK_GROWSUP
> -   return PAGE_ALIGN(stack_top) + random_variable;
> -#else
> -   return PAGE_ALIGN(stack_top) - random_variable;
> -#endif
> -}
> -
>  static int load_elf_binary(struct linux_binprm *bprm)
>  {
> struct file *interpreter = NULL; /* to shut gcc up */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 76769749b5a5..087824a5059f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2312,6 +2312,8 @@ extern int install_special_mapping(struct mm_struct *mm,
>unsigned long addr, unsigned long len,
>unsigned long flags, struct page **pages);
>
> +unsigned long randomize_stack_top(unsigned long stack_top);
> +
>  extern unsigned long get_unmapped_area(struct file *, unsigned long, 
> unsigned long, unsigned long, unsigned long);
>
>  extern unsigned long mmap_region(struct file *file, unsigned long addr,
> diff --git a/mm/util.c b/mm/util.c
> index d559bde497a9..a54afb9b4faa 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -14,6 +14,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>
>  #include 
>
> @@ -291,6 +293,26 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
> return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
>  }
>
> +#ifndef STACK_RND_MASK
> +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
> +#endif

Oh right, here's the generic one... this should probably just copy
arm64's version instead. Then x86 can be tweaked (it uses
mmap_is_ia32() instead of is_compat_task() by default, but has a weird
override..)

Regardless, yes, this is a direct code move:

Acked-by: Kees Cook 

-Kees

> +
> +unsigned long randomize_stack_top(unsigned long stack_top)
> +{
> +   unsigned long random_variable = 0;
> +
> +   if (current->flags & PF_RANDOMIZE) {
> +   random_variable = get_random_long();
> +   random_variable &= STACK_RND_MASK;
> +   random_variable <<= PAGE_SHIFT;
> +   }
> +#ifdef CONFIG_STACK_GROWSUP
> +   return PAGE_ALIGN(stack_top) + random_variable;
> +#else
> +   return PAGE_ALIGN(stack_top) - random_variable;
> +#endif
> +}
> +
>  #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
>  void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
>  {
> --
> 2.20.1
>


-- 
Kees Cook


Re: [PATCH v4 01/26] ALSA: line6: Avoid polluting led_* namespace

2019-04-17 Thread Takashi Iwai
On Wed, 17 Apr 2019 22:54:14 +0200,
Jacek Anaszewski wrote:
> 
> led_colors clashes with the array of the same name being added
> to the LED class. Do the following amendments to fix this issue
> and the other prospective one.
> 
> led_colors -> toneport_led_colors
> led_init_vals -> toneport_led_init_vals
> 
> Fixes: f44edd7b2bbed ("ALSA: line6/toneport: Implement LED controls via LED 
> class")
> Signed-off-by: Jacek Anaszewski 
> Cc: Jaroslav Kysela 
> Cc: Takashi Iwai 
> Cc: Arnd Bergmann 

Looks good.

Feel free to take my ack:
  Reviewed-by: Takashi Iwai 


thanks,

Takashi

> ---
>  sound/usb/line6/toneport.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c
> index f47ba94e6f4a..56875526b182 100644
> --- a/sound/usb/line6/toneport.c
> +++ b/sound/usb/line6/toneport.c
> @@ -291,8 +291,8 @@ static bool toneport_has_led(struct usb_line6_toneport 
> *toneport)
>   }
>  }
>  
> -static const char * const led_colors[2] = { "red", "green" };
> -static const int led_init_vals[2] = { 0x00, 0x26 };
> +static const char * const toneport_led_colors[2] = { "red", "green" };
> +static const int toneport_led_init_vals[2] = { 0x00, 0x26 };
>  
>  static void toneport_update_led(struct usb_line6_toneport *toneport)
>  {
> @@ -320,9 +320,9 @@ static int toneport_init_leds(struct usb_line6_toneport 
> *toneport)
>  
>   led->toneport = toneport;
>   snprintf(led->name, sizeof(led->name), "%s::%s",
> -  dev_name(dev), led_colors[i]);
> +  dev_name(dev), toneport_led_colors[i]);
>   leddev->name = led->name;
> - leddev->brightness = led_init_vals[i];
> + leddev->brightness = toneport_led_init_vals[i];
>   leddev->max_brightness = 0x26;
>   leddev->brightness_set = toneport_led_brightness_set;
>   err = led_classdev_register(dev, leddev);
> -- 
> 2.11.0
> 
> 


Re: [PATCH v3] serial: Add Milbeaut serial control

2019-04-17 Thread Greg Kroah-Hartman
On Thu, Apr 18, 2019 at 11:51:56AM +0900, Sugaya Taichi wrote:
> Add Milbeaut serial control including earlycon and console.
> 
> Signed-off-by: Sugaya Taichi 
> ---
> Changes from v2:
>  - Fix build warning.

No, I only need an incremental patch fixing the one sparse warning
found, not a whole new patch as I have already merged your original
patch, right?

thanks,

greg k-h


Re: [ALSA] feb689025f: WARNING:possible_recursive_locking_detected

2019-04-17 Thread Takashi Iwai
On Thu, 18 Apr 2019 02:22:57 +0200,
kernel test robot wrote:
> 
> 
> FYI, we noticed the following commit (built with gcc-5):
> 
> commit: feb689025fbb6f0aa6297d3ddf97de945ea4ad32 ("ALSA: seq: Protect 
> in-kernel ioctl calls with mutex")
> https://git.kernel.org/cgit/linux/kernel/git/tiwai/sound.git for-next

FYI, the patch was already reverted in the later commit and fixed
differently.  Sorry for the inconvenience.


Takashi


Re: [PATCH] selftests/bpf: fix compile errors with older glibc

2019-04-17 Thread Yonghong Song


On 4/17/19 10:48 AM, Wang YanQing wrote:
> The older glibc (for example, 2.23) doesn't handle __UAPI_DEF_*
> in libc-compat.h properly, and it bring below compile errors:

I have an even old glibc 2.17 and it still works. Not sure
why it failed here. Could you explain more?

But I applied the change, it still works with 2.17 glibc and gcc 4.8.5.
So change probably safe.

> "
> In file included from test_tcpnotify_kern.c:12:
> /usr/include/netinet/in.h:101:5: error: expected identifier
>  IPPROTO_HOPOPTS = 0,   /* IPv6 Hop-by-Hop options.  */
>  ^
> /usr/include/linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS'
>  ^
> In file included from test_tcpnotify_kern.c:12:
> /usr/include/netinet/in.h:103:5: error: expected identifier
>  IPPROTO_ROUTING = 43,  /* IPv6 routing header.  */
>  ^
> /usr/include/linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING'
>  ^
> In file included from test_tcpnotify_kern.c:12:
> /usr/include/netinet/in.h:105:5: error: expected identifier
>  IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header.  */
>  ^
> /usr/include/linux/in6.h:133:26: note: expanded from macro 'IPPROTO_FRAGMENT'
> "
> The same compile errors are reported for test_tcpbpf_kern.c too.
> 
> This patch fixes the compile errors by moving  to before the
> .
> 
> Signed-off-by: Wang YanQing 
> ---
>   tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c| 2 +-
>   tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c | 2 +-
>   2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c 
> b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> index 74f73b3..c7c3240 100644
> --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> @@ -1,6 +1,7 @@
>   // SPDX-License-Identifier: GPL-2.0
>   #include 
>   #include 
> +#include 
>   #include 
>   #include 
>   #include 
> @@ -9,7 +10,6 @@
>   #include 
>   #include 
>   #include 
> -#include 
>   #include "bpf_helpers.h"
>   #include "bpf_endian.h"
>   #include "test_tcpbpf.h"
> diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c 
> b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
> index edbca20..ec6db6e 100644
> --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
> +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
> @@ -1,6 +1,7 @@
>   // SPDX-License-Identifier: GPL-2.0
>   #include 
>   #include 
> +#include 
>   #include 
>   #include 
>   #include 
> @@ -9,7 +10,6 @@
>   #include 
>   #include 
>   #include 
> -#include 
>   #include "bpf_helpers.h"
>   #include "bpf_endian.h"
>   #include "test_tcpnotify.h"
> 


Re: [PATCH v2 3/7] phy: qcom: Add Qualcomm PCIe2 PHY driver

2019-04-17 Thread Vinod Koul
On 18-02-19, 22:04, Bjorn Andersson wrote:

> +static int qcom_pcie2_phy_power_on(struct phy *phy)
> +{
> + struct qcom_phy *qphy = phy_get_drvdata(phy);
> + int ret;
> + u32 val;
> +
> + /* Program REF_CLK source */
> + val = readl(qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL2);
> + val &= ~BIT(1);
> + writel(val, qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL2);
> +
> + usleep_range(1000, 2000);
> +
> + /* Don't use PAD for refclock */
> + val = readl(qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL2);
> + val &= ~BIT(0);
> + writel(val, qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL2);
> +
> + /* Program SSP ENABLE */
> + val = readl(qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL3);
> + val |= BIT(0);
> + writel(val, qphy->base + PCIE20_PARF_PHY_REFCLK_CTRL3);

we have this readl, modify and writel pattern in the file. I guess it
makes sense to add a modifyl() with mask and value as args..

-- 
~Vinod


Re: [PATCH 5.0 072/246] drm/amd/display: Fix reference counting for struct dc_sink.

2019-04-17 Thread Mathias Fröhlich
Hi Greg,

On Monday, 15 April 2019 12:50:29 CEST Greg Kroah-Hartman wrote:
> On Fri, Apr 05, 2019 at 07:12:47AM +0200, Mathias Fröhlich wrote:
> > Greg,
> > 
> > as I mentioned in the commit message, I saw more fixes to that area in Alex
> > Deuchers queue when I fed that to Alex. There is one fix that I can think of
> > that interacts with my fixes. Means, we may get unwanted side effects of my
> > patch without the fix mentioned below. With that below patch also selected,
> > I think we should be ok for stable.
> > Alex, AMD people, your opinion?
> > 
> > The one that I can spot not already in linux-5.0.y is:
> > 
> > commit 3f01f098a4e2ef30ef628497c43a3d568e720376
> > Author: Jerry (Fangzhi) Zuo 
> > Date:   Thu Jan 24 11:46:49 2019 -0500
> > 
> > drm/amd/display: Clear dc_sink after it gets released
> > 
> > [Why]
> > The dc_sink was released but the pointer on the aconnector was
> > not cleared.
> > 
> > [How]
> > Clear it.
> 
> This patch does not apply to the 5.0.y tree, so I can not queue it up :(

Sounds plausible.
And thanks for trying!

I had now the chance to test the 5.0.7 kernel tree as is including just
my patch on the box in question. I can say that it works as expected with 5.0.7
and since I did also not hear complaints on the lists I hope we are good as is!

Many thanks!

best

Mathias




Re: [PATCH 1/2] mtd: nand: Kconfig: correct the MTD_NAND_ECC_SW_BCH select

2019-04-17 Thread Anders Roxell
On Tue, 16 Apr 2019 at 23:53, Miquel Raynal  wrote:
>
> Hi Anders,
>
> Anders Roxell  wrote on Wed, 10 Apr 2019
> 21:58:51 +0200:
>
> > Config fragments should not have the prefix 'CONFIG_'.
> >
> > Rework to remove the prefix 'CONFIG_' from 'CONFIG_MTD_NAND_ECC_SW_BCH'.
> >
> > Fixes: 51ef1d0b2095 ("mtd: nand: Clarify Kconfig entry for software BCH ECC 
> > algorithm")
> > Signed-off-by: Anders Roxell 
> > ---
>
> Thanks for both patches, do you mind if I squash these with the faulty
> commits which are still in nand/next?

That is fine by me.

Cheers,
Anders


[PATCH v5 02/02] tty: rocket: deprecate the rp_ioctl

2019-04-17 Thread Fuqian Huang
This patch depends on patch 01.

The rp_ioctl is deprecated.
Add dev_warn_ratelimited to warn the use of rp_ioctl.

Signed-off-by: Fuqian Huang 
---
 drivers/tty/rocket.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index b6543e28bd8b..1e1bcb54b680 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -1284,18 +1284,28 @@ static int rp_ioctl(struct tty_struct *tty,
 
switch (cmd) {
case RCKP_GET_CONFIG:
+   dev_warn_ratelimited(tty->dev,
+   "RCKP_GET_CONFIG option is 
deprecated\n");
ret = get_config(info, argp);
break;
case RCKP_SET_CONFIG:
+   dev_warn_ratelimited(tty->dev,
+   "RCKP_SET_CONFIG option is 
deprecated\n");
ret = set_config(tty, info, argp);
break;
case RCKP_GET_PORTS:
+   dev_warn_ratelimited(tty->dev,
+   "RCKP_GET_PORTS option is 
deprecated\n");
ret = get_ports(info, argp);
break;
case RCKP_RESET_RM2:
+   dev_warn_ratelimited(tty->dev,
+   "RCKP_RESET_RM2 option is 
deprecated\n");
ret = reset_rm2(info, argp);
break;
case RCKP_GET_VERSION:
+   dev_warn_ratelimited(tty->dev,
+   "RCKP_GET_VERSION option is 
deprecated\n");
ret = get_version(info, argp);
break;
default:
-- 
2.11.0



Re: [PATCH v3 03/11] arm64: Consider stack randomization for mmap base only when necessary

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:26 AM Alexandre Ghiti  wrote:
>
> Do not offset mmap base address because of stack randomization if
> current task does not want randomization.

Maybe mention that this makes this logic match the existing x86 behavior too?

> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/arm64/mm/mmap.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
> index ed4f9915f2b8..ac89686c4af8 100644
> --- a/arch/arm64/mm/mmap.c
> +++ b/arch/arm64/mm/mmap.c
> @@ -65,7 +65,11 @@ unsigned long arch_mmap_rnd(void)
>  static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
>  {
> unsigned long gap = rlim_stack->rlim_cur;
> -   unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
> +   unsigned long pad = stack_guard_gap;
> +
> +   /* Account for stack randomization if necessary */
> +   if (current->flags & PF_RANDOMIZE)
> +   pad += (STACK_RND_MASK << PAGE_SHIFT);
>
> /* Values close to RLIM_INFINITY can overflow. */
> if (gap + pad > gap)
> --
> 2.20.1
>


-- 
Kees Cook


[PATCH v5 01/02] tty: rocket: Remove RCPK_GET_STRUCT ioctl

2019-04-17 Thread Fuqian Huang
If the cmd is RCPK_GET_STRUCT, copy_to_user will copy
info to user space. As info->port.ops is the address of
a constant object rocket_port_ops (assigned in init_r_port),
a kernel address leakage happens.

Remove the RCPK_GET_STRUCT ioctl.

Signed-off-by: Fuqian Huang 
---
 drivers/tty/rocket.c | 4 
 drivers/tty/rocket.h | 1 -
 2 files changed, 5 deletions(-)

diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index b121d8f8f3d7..b6543e28bd8b 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -1283,10 +1283,6 @@ static int rp_ioctl(struct tty_struct *tty,
return -ENXIO;
 
switch (cmd) {
-   case RCKP_GET_STRUCT:
-   if (copy_to_user(argp, info, sizeof (struct r_port)))
-   ret = -EFAULT;
-   break;
case RCKP_GET_CONFIG:
ret = get_config(info, argp);
break;
diff --git a/drivers/tty/rocket.h b/drivers/tty/rocket.h
index d0560203f215..d62ed6587f32 100644
--- a/drivers/tty/rocket.h
+++ b/drivers/tty/rocket.h
@@ -71,7 +71,6 @@ struct rocket_version {
 /*
  * Rocketport ioctls -- "RP"
  */
-#define RCKP_GET_STRUCT0x00525001
 #define RCKP_GET_CONFIG0x00525002
 #define RCKP_SET_CONFIG0x00525003
 #define RCKP_GET_PORTS 0x00525004
-- 
2.11.0



Re: [PATCH v3 02/11] arm64: Make use of is_compat_task instead of hardcoding this test

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 12:25 AM Alexandre Ghiti  wrote:
>
> Each architecture has its own way to determine if a task is a compat task,
> by using is_compat_task in arch_mmap_rnd, it allows more genericity and
> then it prepares its moving to mm/.
>
> Signed-off-by: Alexandre Ghiti 

Acked-by: Kees Cook 

-Kees

> ---
>  arch/arm64/mm/mmap.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
> index 842c8a5fcd53..ed4f9915f2b8 100644
> --- a/arch/arm64/mm/mmap.c
> +++ b/arch/arm64/mm/mmap.c
> @@ -54,7 +54,7 @@ unsigned long arch_mmap_rnd(void)
> unsigned long rnd;
>
>  #ifdef CONFIG_COMPAT
> -   if (test_thread_flag(TIF_32BIT))
> +   if (is_compat_task())
> rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
> else
>  #endif
> --
> 2.20.1
>


-- 
Kees Cook


[PATCH] arm64: mm: Ensure tail of unaligned initrd is reserved

2019-04-17 Thread Bjorn Andersson
In the event that the start address of the initrd is not aligned, but
has an aligned size, the base + size will not cover the entire initrd
image and there is a chance that the kernel will corrupt the tail of the
image.

By aligning the end of the initrd to a page boundary and then
subtracting the adjusted start address the memblock reservation will
cover all pages that contains the initrd.

Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size")
Cc: sta...@vger.kernel.org
Signed-off-by: Bjorn Andersson 
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc135042f5e..7cae155e81a5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -363,7 +363,7 @@ void __init arm64_memblock_init(void)
 * Otherwise, this is a no-op
 */
u64 base = phys_initrd_start & PAGE_MASK;
-   u64 size = PAGE_ALIGN(phys_initrd_size);
+   u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - 
base;
 
/*
 * We can only add back the initrd memory if we don't end up
-- 
2.18.0



[QUESTIONS] THP allocation in NUMA fault migration path

2019-04-17 Thread Yang Shi

Hi folks,


I noticed that there might be new THP allocation in NUMA fault migration 
path (migrate_misplaced_transhuge_page()) even when THP is disabled (set 
to "never"). When THP is set to "never", there should be not any new THP 
allocation, but the migration path is kind of special. So I'm not quite 
sure if this is the expected behavior or not?



And, it looks this allocation disregards defrag setting too, is this 
expected behavior too?



Thanks,

Yang



linux-next: manual merge of the tip tree with the printk tree

2019-04-17 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the tip tree got a conflict in:

  arch/x86/kernel/irq_64.c

between commit:

  d75f773c86a2 ("treewide: Switch printk users from %pf and %pF to %ps and %pS, 
respectively")

from the printk tree and commit:

  117ed4548541 ("x86/irq/64: Remove stack overflow debug code")

from the tip tree.

I fixed it up (the latter removed taht code updated by the former) and
can carry the fix as necessary. This is now fixed as far as linux-next
is concerned, but any non trivial conflicts should be mentioned to your
upstream maintainer when your tree is submitted for merging.  You may
also want to consider cooperating with the maintainer of the conflicting
tree to minimise any particularly complex conflicts.



-- 
Cheers,
Stephen Rothwell


pgpQfko5NwMzJ.pgp
Description: OpenPGP digital signature


Re: [PATCH v2 3/3] thermal: cpu_cooling: Migrate to using the EM framework

2019-04-17 Thread Viresh Kumar
On 17-04-19, 10:43, Quentin Perret wrote:
>  static struct thermal_cooling_device *
>  __cpufreq_cooling_register(struct device_node *np,
> - struct cpufreq_policy *policy, u32 capacitance)
> + struct cpufreq_policy *policy,
> + struct em_perf_domain *em)
>  {

> + if (em_is_sane(cpufreq_cdev, em)) {
> + cpufreq_cdev->em = em;
>   cooling_ops = _power_cooling_ops;
> - } else {
> + } else if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED) {
>   cooling_ops = _cooling_ops;
> + } else {
> + WARN(1, "cpu_cooling: no valid frequency table found\n");

Well the frequency table is valid, isn't it ? Maybe something like: "cpu_cooling
doesn't support unsorted frequency tables" ?

> + cdev = ERR_PTR(-EINVAL);
> + goto remove_ida;
>   }
>  
>   cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
> @@ -691,7 +619,7 @@ __cpufreq_cooling_register(struct device_node *np,
>   if (IS_ERR(cdev))
>   goto remove_ida;
>  
> - cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
> + cpufreq_cdev->clipped_freq = get_state_freq(cpufreq_cdev, 0);
>   cpufreq_cdev->cdev = cdev;
>  
>   mutex_lock(_list_lock);
> @@ -708,8 +636,6 @@ __cpufreq_cooling_register(struct device_node *np,
>  
>  remove_ida:
>   ida_simple_remove(_ida, cpufreq_cdev->id);
> -free_table:
> - kfree(cpufreq_cdev->freq_table);
>  free_idle_time:
>   kfree(cpufreq_cdev->idle_time);
>  free_cdev:
> @@ -731,7 +657,7 @@ __cpufreq_cooling_register(struct device_node *np,
>  struct thermal_cooling_device *
>  cpufreq_cooling_register(struct cpufreq_policy *policy)
>  {
> - return __cpufreq_cooling_register(NULL, policy, 0);
> + return __cpufreq_cooling_register(NULL, policy, NULL);
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
>  
> @@ -759,7 +685,6 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
>  {
>   struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
>   struct thermal_cooling_device *cdev = NULL;
> - u32 capacitance = 0;
>  
>   if (!np) {
>   pr_err("cpu_cooling: OF node not available for cpu%d\n",
> @@ -768,10 +693,9 @@ of_cpufreq_cooling_register(struct cpufreq_policy 
> *policy)
>   }
>  
>   if (of_find_property(np, "#cooling-cells", NULL)) {
> - of_property_read_u32(np, "dynamic-power-coefficient",
> -  );
> + struct em_perf_domain *em = em_cpu_get(policy->cpu);
>  
> - cdev = __cpufreq_cooling_register(np, policy, capacitance);
> + cdev = __cpufreq_cooling_register(np, policy, em);
>   if (IS_ERR(cdev)) {
>   pr_err("cpu_cooling: cpu%d failed to register as 
> cooling device: %ld\n",
>  policy->cpu, PTR_ERR(cdev));
> @@ -813,7 +737,6 @@ void cpufreq_cooling_unregister(struct 
> thermal_cooling_device *cdev)
>   thermal_cooling_device_unregister(cpufreq_cdev->cdev);
>   ida_simple_remove(_ida, cpufreq_cdev->id);
>   kfree(cpufreq_cdev->idle_time);
> - kfree(cpufreq_cdev->freq_table);
>   kfree(cpufreq_cdev);
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
> -- 
> 2.21.0

-- 
viresh


[PATCH 3/3] RAS/CEC: immediate soft-offline page when count_threshold == 1

2019-04-17 Thread WANG Chao
count_threshol == 1 isn't working as expected. CEC only does soft
offline the second time the same pfn is hit by a correctable error.

Signed-off-by: WANG Chao 
---
 drivers/ras/cec.c | 36 +---
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 702e4c02c713..ac879c45377c 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -272,7 +272,22 @@ static u64 __maybe_unused del_lru_elem(void)
return pfn;
 }
 
+static void cec_valid_soft_offline(u64 pfn)
+{
+   if (!pfn_valid(pfn)) {
+   pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
+   } else {
+   /* We have reached max count for this page, soft-offline it. */
+   pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
+   memory_failure_queue(pfn, MF_SOFT_OFFLINE, _chain);
+   ce_arr.pfns_poisoned++;
+   }
+}
 
+/*
+ * Return a >0 value to denote that we've reached the offlining
+ * threshold.
+ */
 int cec_add_elem(u64 pfn)
 {
struct ce_array *ca = _arr;
@@ -295,6 +310,11 @@ int cec_add_elem(u64 pfn)
 
ret = find_elem(ca, pfn, );
if (ret < 0) {
+   if (count_threshold == 1) {
+   cec_valid_soft_offline(pfn);
+   ret = 1;
+   goto unlock;
+   }
/*
 * Shift range [to-end] to make room for one more element.
 */
@@ -320,23 +340,9 @@ int cec_add_elem(u64 pfn)
 
ret = 0;
} else {
-   u64 pfn = ca->array[to] >> PAGE_SHIFT;
-
-   if (!pfn_valid(pfn)) {
-   pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
-   } else {
-   /* We have reached max count for this page, 
soft-offline it. */
-   pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
-   memory_failure_queue(pfn, MF_SOFT_OFFLINE);
-   ca->pfns_poisoned++;
-   }
-
+   cec_valid_soft_offline(pfn);
del_elem(ca, to);
 
-   /*
-* Return a >0 value to denote that we've reached the offlining
-* threshold.
-*/
ret = 1;
 
goto unlock;
-- 
2.21.0



[PATCH] arm64: dts: lx2160a: add cpu idle support

2019-04-17 Thread Ran Wang
lx2160a supports pw20 which could help save more power during cpu is
dile. It needs system firmware support via PSCI.

Signed-off-by: Ran Wang 
---
 arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi |   25 
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi 
b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index fe87204..1e07155 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -33,6 +33,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@1 {
@@ -48,6 +49,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@100 {
@@ -63,6 +65,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@101 {
@@ -78,6 +81,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@200 {
@@ -93,6 +97,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@201 {
@@ -108,6 +113,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@300 {
@@ -123,6 +129,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@301 {
@@ -138,6 +145,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@400 {
@@ -153,6 +161,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@401 {
@@ -168,6 +177,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@500 {
@@ -183,6 +193,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@501 {
@@ -198,6 +209,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@600 {
@@ -213,6 +225,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@601 {
@@ -228,6 +241,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@700 {
@@ -243,6 +257,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cpu@701 {
@@ -258,6 +273,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <_l2>;
+   cpu-idle-states = <_pw20>;
};
 
cluster0_l2: l2-cache0 {
@@ -323,6 +339,15 @@
cache-sets = <1024>;
cache-level = <2>;
};
+
+   cpu_pw20: cpu-pw20 {
+   compatible = "arm,idle-state";
+   

[PATCH 2/3] RAS/CEC: make ces_entered smp safe

2019-04-17 Thread WANG Chao
ces_entered should be put in a critical section to avoid race condition.

Signed-off-by: WANG Chao 
---
 drivers/ras/cec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 2e0bf1269c31..702e4c02c713 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -286,10 +286,10 @@ int cec_add_elem(u64 pfn)
if (!ce_arr.array || ce_arr.disabled)
return -ENODEV;
 
-   ca->ces_entered++;
-
mutex_lock(_mutex);
 
+   ca->ces_entered++;
+
if (ca->n == MAX_ELEMS)
WARN_ON(!del_lru_elem_unlocked(ca));
 
-- 
2.21.0



[PATCH 1/3] RAS/CEC: fix __find_elem

2019-04-17 Thread WANG Chao
A left over pfn (because we don't clear) at ca->array[n] can be a match
in __find_elem. Later it'd cause a memmove size overflow in del_elem.

Signed-off-by: WANG Chao 
---
 drivers/ras/cec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 2d9ec378a8bc..2e0bf1269c31 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -206,7 +206,7 @@ static int __find_elem(struct ce_array *ca, u64 pfn, 
unsigned int *to)
 
this_pfn = PFN(ca->array[min]);
 
-   if (this_pfn == pfn)
+   if (this_pfn == pfn && ca->n > min)
return min;
 
return -ENOKEY;
-- 
2.21.0



Re: [PATCH] ceph: Fix a memory leak in ci->i_head_snapc

2019-04-17 Thread Yan, Zheng
On Tue, Apr 16, 2019 at 9:30 PM Luis Henriques  wrote:
>
> Luis Henriques  writes:
>
> > "Yan, Zheng"  writes:
> >
> >> On Fri, Mar 22, 2019 at 6:04 PM Luis Henriques  wrote:
> >>>
> >>> Luis Henriques  writes:
> >>>
> >>> > "Yan, Zheng"  writes:
> >>> >
> >>> >> On Tue, Mar 19, 2019 at 12:22 AM Luis Henriques  
> >>> >> wrote:
> >>> >>>
> >>> >>> "Yan, Zheng"  writes:
> >>> >>>
> >>> >>> > On Mon, Mar 18, 2019 at 6:33 PM Luis Henriques 
> >>> >>> >  wrote:
> >>> >>> >>
> >>> >>> >> "Yan, Zheng"  writes:
> >>> >>> >>
> >>> >>> >> > On Fri, Mar 15, 2019 at 7:13 PM Luis Henriques 
> >>> >>> >> >  wrote:
> >>> >>> >> >>
> >>> >>> >> >> I'm occasionally seeing a kmemleak warning in xfstest 
> >>> >>> >> >> generic/013:
> >>> >>> >> >>
> >>> >>> >> >> unreferenced object 0x8881fccca940 (size 32):
> >>> >>> >> >>   comm "kworker/0:1", pid 12, jiffies 4295005883 (age 130.648s)
> >>> >>> >> >>   hex dump (first 32 bytes):
> >>> >>> >> >> 01 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00  
> >>> >>> >> >> 
> >>> >>> >> >> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
> >>> >>> >> >> 
> >>> >>> >> >>   backtrace:
> >>> >>> >> >> [] build_snap_context+0x5b/0x2a0
> >>> >>> >> >> [<21a00533>] rebuild_snap_realms+0x27/0x90
> >>> >>> >> >> [] rebuild_snap_realms+0x42/0x90
> >>> >>> >> >> [<0e955fac>] ceph_update_snap_trace+0x2ee/0x610
> >>> >>> >> >> [] ceph_handle_snap+0x317/0x5f3
> >>> >>> >> >> [] dispatch+0x362/0x176c
> >>> >>> >> >> [] ceph_con_workfn+0x9ce/0x2cf0
> >>> >>> >> >> [<4168e3a9>] process_one_work+0x1d4/0x400
> >>> >>> >> >> [<2188e9e7>] worker_thread+0x2d/0x3c0
> >>> >>> >> >> [] kthread+0x112/0x130
> >>> >>> >> >> [] ret_from_fork+0x35/0x40
> >>> >>> >> >> [] 0x
> >>> >>> >> >>
> >>> >>> >> >> It looks like it is possible that we miss a flush_ack from the 
> >>> >>> >> >> MDS when,
> >>> >>> >> >> for example, umounting the filesystem.  In that case, we can 
> >>> >>> >> >> simply drop
> >>> >>> >> >> the reference to the ceph_snap_context obtained in 
> >>> >>> >> >> ceph_queue_cap_snap().
> >>> >>> >> >>
> >>> >>> >> >> Link: https://tracker.ceph.com/issues/38224
> >>> >>> >> >> Cc: sta...@vger.kernel.org
> >>> >>> >> >> Signed-off-by: Luis Henriques 
> >>> >>> >> >> ---
> >>> >>> >> >>  fs/ceph/caps.c | 7 +++
> >>> >>> >> >>  1 file changed, 7 insertions(+)
> >>> >>> >> >>
> >>> >>> >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >>> >>> >> >> index 36a8dc699448..208f4dc6f574 100644
> >>> >>> >> >> --- a/fs/ceph/caps.c
> >>> >>> >> >> +++ b/fs/ceph/caps.c
> >>> >>> >> >> @@ -1054,6 +1054,7 @@ int ceph_is_any_caps(struct inode *inode)
> >>> >>> >> >>  static void drop_inode_snap_realm(struct ceph_inode_info *ci)
> >>> >>> >> >>  {
> >>> >>> >> >> struct ceph_snap_realm *realm = ci->i_snap_realm;
> >>> >>> >> >> +
> >>> >>> >> >> spin_lock(>inodes_with_caps_lock);
> >>> >>> >> >> list_del_init(>i_snap_realm_item);
> >>> >>> >> >> ci->i_snap_realm_counter++;
> >>> >>> >> >> @@ -1063,6 +1064,12 @@ static void drop_inode_snap_realm(struct 
> >>> >>> >> >> ceph_inode_info *ci)
> >>> >>> >> >> spin_unlock(>inodes_with_caps_lock);
> >>> >>> >> >> 
> >>> >>> >> >> ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
> >>> >>> >> >> realm);
> >>> >>> >> >> +   /*
> >>> >>> >> >> +* ci->i_head_snapc should be NULL, but we may still be 
> >>> >>> >> >> waiting for a
> >>> >>> >> >> +* flush_ack from the MDS.  In that case, we still hold 
> >>> >>> >> >> a ref for the
> >>> >>> >> >> +* ceph_snap_context and we need to drop it.
> >>> >>> >> >> +*/
> >>> >>> >> >> +   ceph_put_snap_context(ci->i_head_snapc);
> >>> >>> >> >>  }
> >>> >>> >> >>
> >>> >>> >> >>  /*
> >>> >>> >> >
> >>> >>> >> > This does not seem right.  i_head_snapc is cleared when
> >>> >>> >> > (ci->i_wrbuffer_ref_head == 0 && ci->i_dirty_caps == 0 &&
> >>> >>> >> > ci->i_flushing_caps == 0) . Nothing do with dropping 
> >>> >>> >> > ci->i_snap_realm.
> >>> >>> >> > Did you see 'reconnect denied' during the test? If you did, the 
> >>> >>> >> > fix
> >>> >>> >> > should be in iterate_session_caps()
> >>> >>> >> >
> >>> >>> >>
> >>> >>> >> No, I didn't saw any 'reconnect denied' in the test.  The test 
> >>> >>> >> actually
> >>> >>> >> seems to execute fine, except from the memory leak.
> >>> >>> >>
> >>> >>> >> It's very difficult to reproduce this issue, but last time I 
> >>> >>> >> managed to
> >>> >>> >> get this memory leak to trigger I actually had some debugging code 
> >>> >>> >> in
> >>> >>> >> drop_inode_snap_realm, something like:
> >>> >>> >>
> >>> >>> >>   if (ci->i_head_snapc)
> >>> >>> >> 

RE: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more thermal zone node

2019-04-17 Thread Andy Tang

> -Original Message-
> From: Daniel Lezcano 
> Sent: 2019年4月12日 20:19
> To: Andy Tang ; shawn...@kernel.org
> Cc: Leo Li ; robh...@kernel.org; mark.rutl...@arm.com;
> linux-arm-ker...@lists.infradead.org; devicet...@vger.kernel.org;
> linux-kernel@vger.kernel.org; linux...@vger.kernel.org; rui.zh...@intel.com;
> edubez...@gmail.com
> Subject: Re: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more thermal
> zone node
> 
> WARNING: This email was created outside of NXP. DO NOT CLICK links or
> attachments unless you recognize the sender and know the content is safe.
> 
> 
> 
> On 12/04/2019 09:47, Andy Tang wrote:
> >
> >> -Original Message-
> >> From: Daniel Lezcano 
> >> Sent: 2019年4月12日 3:15
> >> To: Andy Tang ; shawn...@kernel.org
> >> Cc: Leo Li ; robh...@kernel.org;
> >> mark.rutl...@arm.com; linux-arm-ker...@lists.infradead.org;
> >> devicet...@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> linux...@vger.kernel.org; rui.zh...@intel.com; edubez...@gmail.com
> >> Subject: [EXT] Re: [PATCH v5] arm64: dts: ls1088a: add one more
> >> thermal zone node
> >>
> >> WARNING: This email was created outside of NXP. DO NOT CLICK links or
> >> attachments unless you recognize the sender and know the content is safe.
> >>
> >>
> >>
> >> On 11/04/2019 10:32, Yuantian Tang wrote:
> >>> Ls1088a has 2 thermal sensors, core cluster and SoC platform. Core
> >>> cluster sensor is used to monitor the temperature of core and SoC
> >>> platform is for platform. The current dts only support the first sensor.
> >>> This patch adds the second sensor node to dts to enable it.
> >>>
> >>> Signed-off-by: Yuantian Tang 
> >>> ---
> >>> v5:
> >>>   - update the thermal zone name due to the length limitation
> >>>   - remove cooling map in platform zone
> >>> v4:
> >>>   - use hyphen instead of underscore in node name
> >>> v3:
> >>>   - use more descriptive name for each zone
> >>> v2:
> >>>   - Add more information about sensors to description
> >>>  arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi |   28
> >> ---
> >>>  1 files changed, 24 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> >>> b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> >>> index de93b42..de39672 100644
> >>> --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> >>> +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
> >>> @@ -129,19 +129,19 @@
> >>>   };
> >>>
> >>>   thermal-zones {
> >>> - cpu_thermal: cpu-thermal {
> >>> + core-cluster {
> >>>   polling-delay-passive = <1000>;
> >>>   polling-delay = <5000>;
> >>>   thermal-sensors = < 0>;
> >>>
> >>>   trips {
> >>> - cpu_alert: cpu-alert {
> >>> + core_cluster_alert: core-cluster-alert
> >>> + {
> >>>   temperature = <85000>;
> >>>   hysteresis = <2000>;
> >>>   type = "passive";
> >>>   };
> >>>
> >>> - cpu_crit: cpu-crit {
> >>> + core_cluster_crit: core-cluster-crit {
> >>>   temperature = <95000>;
> >>>   hysteresis = <2000>;
> >>>   type = "critical"; @@ -150,7
> >>> +150,7 @@
> >>>
> >>>   cooling-maps {
> >>>   map0 {
> >>> - trip = <_alert>;
> >>> + trip = <_cluster_alert>;
> >>>   cooling-device =
> >>>   <
> >> THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> >>>   <
> >> THERMAL_NO_LIMIT
> >>> THERMAL_NO_LIMIT>, @@ -163,6 +163,26 @@
> >>>   };
> >>>   };
> >>>   };
> >>> +
> >>> + platform {
> >>> + polling-delay-passive = <1000>;
> >>> + polling-delay = <5000>;
> >>> + thermal-sensors = < 1>;
> >>> +
> >>> + trips {
> >>> + platform-alert {
> >>> + temperature = <85000>;
> >>> + hysteresis = <2000>;
> >>> + type = "passive";
> >>> + };
> >>> +
> >>> + platform-crit {
> >>> + temperature = <95000>;
> >>> + hysteresis = <2000>;
> >>> + type = "critical";
> >>> + };
> >>> + };
> >>> + 

Re: [PATCH 4/5] mm: rework non-root kmem_cache lifecycle management

2019-04-17 Thread Roman Gushchin
On Wed, Apr 17, 2019 at 06:55:12PM -0700, Shakeel Butt wrote:
> On Wed, Apr 17, 2019 at 5:39 PM Roman Gushchin  wrote:
> >
> > On Wed, Apr 17, 2019 at 04:41:01PM -0700, Shakeel Butt wrote:
> > > On Wed, Apr 17, 2019 at 2:55 PM Roman Gushchin  wrote:
> > > >
> > > > This commit makes several important changes in the lifecycle
> > > > of a non-root kmem_cache, which also affect the lifecycle
> > > > of a memory cgroup.
> > > >
> > > > Currently each charged slab page has a page->mem_cgroup pointer
> > > > to the memory cgroup and holds a reference to it.
> > > > Kmem_caches are held by the cgroup. On offlining empty kmem_caches
> > > > are freed, all other are freed on cgroup release.
> > >
> > > No, they are not freed (i.e. destroyed) on offlining, only
> > > deactivated. All memcg kmem_caches are freed/destroyed on memcg's
> > > css_free.
> >
> > You're right, my bad. I was thinking about the corresponding sysfs entry
> > when was writing it. We try to free it from the deactivation path too.
> >
> > >
> > > >
> > > > So the current scheme can be illustrated as:
> > > > page->mem_cgroup->kmem_cache.
> > > >
> > > > To implement the slab memory reparenting we need to invert the scheme
> > > > into: page->kmem_cache->mem_cgroup.
> > > >
> > > > Let's make every page to hold a reference to the kmem_cache (we
> > > > already have a stable pointer), and make kmem_caches to hold a single
> > > > reference to the memory cgroup.
> > >
> > > What about memcg_kmem_get_cache()? That function assumes that by
> > > taking reference on memcg, it's kmem_caches will stay. I think you
> > > need to get reference on the kmem_cache in memcg_kmem_get_cache()
> > > within the rcu lock where you get the memcg through css_tryget_online.
> >
> > Yeah, a very good question.
> >
> > I believe it's safe because css_tryget_online() guarantees that
> > the cgroup is online and won't go offline before css_free() in
> > slab_post_alloc_hook(). I do initialize kmem_cache's refcount to 1
> > and drop it on offlining, so it protects the online kmem_cache.
> >
> 
> Let's suppose a thread doing a remote charging calls
> memcg_kmem_get_cache() and gets an empty kmem_cache of the remote
> memcg having refcnt equal to 1. That thread got a reference on the
> remote memcg but no reference on the kmem_cache. Let's suppose that
> thread got stuck in the reclaim and scheduled away. In the meantime
> that remote memcg got offlined and decremented the refcnt of all of
> its kmem_caches. The empty kmem_cache which the thread stuck in
> reclaim have pointer to can get deleted and may be using an already
> destroyed kmem_cache after coming back from reclaim.
> 
> I think the above situation is possible unless the thread gets the
> reference on the kmem_cache in memcg_kmem_get_cache().

Yes, you're right and I'm writing a nonsense: css_tryget_online()
can't prevent the cgroup from being offlined.

So, the problem with getting a reference in memcg_kmem_get_cache()
is that it's an atomic operation on the hot path, something I'd like
to avoid.

I can make the refcounter percpu, but it'll add some complexity and size
to the kmem_cache object. Still an option, of course.

I wonder if we can use rcu_read_lock() instead, and bump the refcounter
only if we're going into reclaim.

What do you think?

Thanks!


iio: dummy_evgen: fix possible memleak in evgen init

2019-04-17 Thread Pan Bian
The memory allocated in the function iio_dummy_evgen_create is not
released if it fails to add the evgen device to device hierarchy. This
may result in a memory leak bug.

Signed-off-by: Pan Bian 
---
 drivers/iio/dummy/iio_dummy_evgen.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/dummy/iio_dummy_evgen.c 
b/drivers/iio/dummy/iio_dummy_evgen.c
index efd0005..f96c0a3 100644
--- a/drivers/iio/dummy/iio_dummy_evgen.c
+++ b/drivers/iio/dummy/iio_dummy_evgen.c
@@ -196,7 +196,10 @@ static __init int iio_dummy_evgen_init(void)
return ret;
device_initialize(_evgen_dev);
dev_set_name(_evgen_dev, "iio_evgen");
-   return device_add(_evgen_dev);
+   ret = device_add(_evgen_dev);
+   if (ret)
+   put_device(_evgen_dev);
+   return ret;
 }
 module_init(iio_dummy_evgen_init);
 
-- 
2.7.4




[PATCH v2 -next] ASoC: Intel: Haswell: Remove set but not used variable 'stage_type'

2019-04-17 Thread Yue Haibing
From: YueHaibing 

Fixes gcc '-Wunused-but-set-variable' warning:

sound/soc/intel/haswell/sst-haswell-ipc.c: In function 'hsw_stream_message':
sound/soc/intel/haswell/sst-haswell-ipc.c:669:29: warning: variable 
'stage_type' set but not used [-Wunused-but-set-variable]

It is never used since introduction in
commit ba57f68235cf ("ASoC: Intel: create haswell folder and move haswell 
platform files in")

Signed-off-by: YueHaibing 
---
v2: also remove the variable declaration and the static inline 
mst_get_stage_type
---
 sound/soc/intel/haswell/sst-haswell-ipc.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sound/soc/intel/haswell/sst-haswell-ipc.c 
b/sound/soc/intel/haswell/sst-haswell-ipc.c
index 31fcdf12..74acf9c 100644
--- a/sound/soc/intel/haswell/sst-haswell-ipc.c
+++ b/sound/soc/intel/haswell/sst-haswell-ipc.c
@@ -345,11 +345,6 @@ static inline u32 msg_get_stream_type(u32 msg)
return (msg & IPC_STR_TYPE_MASK) >>  IPC_STR_TYPE_SHIFT;
 }
 
-static inline u32 msg_get_stage_type(u32 msg)
-{
-   return (msg & IPC_STG_TYPE_MASK) >>  IPC_STG_TYPE_SHIFT;
-}
-
 static inline u32 msg_get_stream_id(u32 msg)
 {
return (msg & IPC_STR_ID_MASK) >>  IPC_STR_ID_SHIFT;
@@ -666,13 +661,12 @@ static int hsw_module_message(struct sst_hsw *hsw, u32 
header)
 
 static int hsw_stream_message(struct sst_hsw *hsw, u32 header)
 {
-   u32 stream_msg, stream_id, stage_type;
+   u32 stream_msg, stream_id;
struct sst_hsw_stream *stream;
int handled = 0;
 
stream_msg = msg_get_stream_type(header);
stream_id = msg_get_stream_id(header);
-   stage_type = msg_get_stage_type(header);
 
stream = get_stream_by_id(hsw, stream_id);
if (stream == NULL)
-- 
2.7.4




[PATCH v3] serial: Add Milbeaut serial control

2019-04-17 Thread Sugaya Taichi
Add Milbeaut serial control including earlycon and console.

Signed-off-by: Sugaya Taichi 
---
Changes from v2:
 - Fix build warning.

Changes from v1:
 - Add "COMPILE_TEST" dependency for coverage test.

 drivers/tty/serial/Kconfig |  26 ++
 drivers/tty/serial/Makefile|   1 +
 drivers/tty/serial/milbeaut_usio.c | 621 +
 include/uapi/linux/serial_core.h   |   3 +
 4 files changed, 651 insertions(+)
 create mode 100644 drivers/tty/serial/milbeaut_usio.c

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 72966bc..d1971a8 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1582,6 +1582,32 @@ config SERIAL_RDA_CONSOLE
  Say 'Y' here if you wish to use the RDA8810PL UART as the system
  console. Only earlycon is implemented currently.
 
+config SERIAL_MILBEAUT_USIO
+   tristate "Milbeaut USIO/UART serial port support"
+   depends on ARCH_MILBEAUT || (COMPILE_TEST && OF)
+   default ARCH_MILBEAUT
+   select SERIAL_CORE
+   help
+ This selects the USIO/UART IP found in Socionext Milbeaut SoCs.
+
+config SERIAL_MILBEAUT_USIO_PORTS
+   int "Maximum number of CSIO/UART ports (1-8)"
+   range 1 8
+   depends on SERIAL_MILBEAUT_USIO
+   default "4"
+
+config SERIAL_MILBEAUT_USIO_CONSOLE
+   bool "Support for console on MILBEAUT USIO/UART serial port"
+   depends on SERIAL_MILBEAUT_USIO=y
+   default y
+   select SERIAL_CORE_CONSOLE
+   select SERIAL_EARLYCON
+   help
+ Say 'Y' here if you wish to use a USIO/UART of Socionext Milbeaut
+ SoCs as the system console (the system console is the device which
+ receives all kernel messages and warnings and which allows logins in
+ single user mode).
+
 endmenu
 
 config SERIAL_MCTRL_GPIO
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 40b702a..43ca2d0 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_SERIAL_PIC32)+= pic32_uart.o
 obj-$(CONFIG_SERIAL_MPS2_UART) += mps2-uart.o
 obj-$(CONFIG_SERIAL_OWL)   += owl-uart.o
 obj-$(CONFIG_SERIAL_RDA)   += rda-uart.o
+obj-$(CONFIG_SERIAL_MILBEAUT_USIO) += milbeaut_usio.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/milbeaut_usio.c 
b/drivers/tty/serial/milbeaut_usio.c
new file mode 100644
index 000..4a10604
--- /dev/null
+++ b/drivers/tty/serial/milbeaut_usio.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Socionext Inc.
+ */
+
+#if defined(CONFIG_SERIAL_MILBEAUT_USIO_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define USIO_NAME  "mlb-usio-uart"
+#define USIO_UART_DEV_NAME "ttyUSI"
+
+static struct uart_port mlb_usio_ports[CONFIG_SERIAL_MILBEAUT_USIO_PORTS];
+
+#define RX 0
+#define TX 1
+static int mlb_usio_irq[CONFIG_SERIAL_MILBEAUT_USIO_PORTS][2];
+
+#define MLB_USIO_REG_SMR   0
+#define MLB_USIO_REG_SCR   1
+#define MLB_USIO_REG_ESCR  2
+#define MLB_USIO_REG_SSR   3
+#define MLB_USIO_REG_DR4
+#define MLB_USIO_REG_BGR   6
+#define MLB_USIO_REG_FCR   12
+#define MLB_USIO_REG_FBYTE 14
+
+#define MLB_USIO_SMR_SOE   BIT(0)
+#define MLB_USIO_SMR_SBL   BIT(3)
+#define MLB_USIO_SCR_TXE   BIT(0)
+#define MLB_USIO_SCR_RXE   BIT(1)
+#define MLB_USIO_SCR_TBIE  BIT(2)
+#define MLB_USIO_SCR_TIE   BIT(3)
+#define MLB_USIO_SCR_RIE   BIT(4)
+#define MLB_USIO_SCR_UPCL  BIT(7)
+#define MLB_USIO_ESCR_L_8BIT   0
+#define MLB_USIO_ESCR_L_5BIT   1
+#define MLB_USIO_ESCR_L_6BIT   2
+#define MLB_USIO_ESCR_L_7BIT   3
+#define MLB_USIO_ESCR_PBIT(3)
+#define MLB_USIO_ESCR_PEN  BIT(4)
+#define MLB_USIO_ESCR_FLWENBIT(7)
+#define MLB_USIO_SSR_TBI   BIT(0)
+#define MLB_USIO_SSR_TDRE  BIT(1)
+#define MLB_USIO_SSR_RDRF  BIT(2)
+#define MLB_USIO_SSR_ORE   BIT(3)
+#define MLB_USIO_SSR_FRE   BIT(4)
+#define MLB_USIO_SSR_PEBIT(5)
+#define MLB_USIO_SSR_REC   BIT(7)
+#define MLB_USIO_SSR_BRK   BIT(8)
+#define MLB_USIO_FCR_FE1   BIT(0)
+#define MLB_USIO_FCR_FE2   BIT(1)
+#define MLB_USIO_FCR_FCL1  BIT(2)
+#define MLB_USIO_FCR_FCL2  BIT(3)
+#define MLB_USIO_FCR_FSET  BIT(4)
+#define MLB_USIO_FCR_FTIE  BIT(9)
+#define MLB_USIO_FCR_FDRQ  BIT(10)
+#define MLB_USIO_FCR_FRIIE BIT(11)
+
+static void 

Re: kernel BUG at kernel/cred.c:434!

2019-04-17 Thread Yang Yingliang

Hi, Casey

On 2019/4/18 8:24, Casey Schaufler wrote:

On 4/17/2019 4:39 PM, Paul Moore wrote:

On Wed, Apr 17, 2019 at 12:27 PM Oleg Nesterov  wrote:

On 04/17, Paul Moore wrote:
On Wed, Apr 17, 2019 at 10:57 AM Oleg Nesterov  
wrote:

On 04/17, Paul Moore wrote:

I'm tempted to simply return an error in selinux_setprocattr() if
the task's credentials are not the same as its real_cred;

What about other modules? I have no idea what smack_setprocattr() is,
but it too does prepare_creds/commit creds.

it seems that the simplest workaround should simply add the 
additional

cred == real_cred into proc_pid_attr_write().

Yes, that is simple, but I worry about what other LSMs might want to
do.  While I believe failing if the effective creds are not the same
as the real_creds is okay for SELinux (possibly Smack too), I worry
about what other LSMs may want to do.  After all,
proc_pid_attr_write() doesn't change the the creds itself, that is
something the specific LSMs do.

Yes, but if proc_pid_attr_write() is called with cred != real_cred then
something is already wrong?

True, or at least I would think so.

Looking at the current tree there are three LSMs which implement
setprocattr hooks: SELinux, Smack, and AppArmor.  I know Casey has
already mentioned that he wasn't able to trigger the problem in Smack,
but looking at smack_setprocattr() I see the similar commit_creds()
usage so I would expect the same problem in Smack; what say you Casey?


I say that my test program runs without ill effect. I call acct()
with "/proc/self/attr/current", which succeeds and enables accounting
just like it is supposed to. I then have the program open
"/proc/self/attr/current" and read it, all of which goes swimmingly.
When Smack frees a cred it usually does not free any memory of its
own, so it is conceivable that I'm just getting lucky. Or, I may not
have sufficient debug enabled.


  Looking at apparmor_setprocattr(), it appears that it too could end
up calling commit_creds() via aa_set_current_hat().

Since it looks like all three LSMs which implement the setprocattr
hook are vulnerable I'm open to the idea that proc_pid_attr_write() is
a better choice for the cred != read_cred check, but I would want to
make sure John and Casey are okay with that.

John?

Casey?


I'm fine with the change going into proc_pid_attr_write().

The cred != real_cred checking is not enough.

Consider this situation, when doing override, cred, real_cred and 
new_cred are all same:


after override_creds()cred == real_cred == new1_cred
after prepare_creds() new2_cred
after commit_creds() becasue the check is false, so cred == 
real_cred == new2_cred

after revert_creds()cred == new1_cred, real_cred == new2_cred

It will cause cred != real_cred finally.


Regards,
Yang





Re: [PATCH v2] ktest: Add support for meta characters in GRUB_MENU

2019-04-17 Thread Steven Rostedt
On Wed, 17 Apr 2019 19:58:23 -0400
Masayoshi Mizuma  wrote:

> From: Masayoshi Mizuma 
> 
> ktest fails if meta characters are in GRUB_MENU, for example
> GRUB_MENU = 'Fedora (test)'
> 
> The failure happens because the meta characters are not escaped,
> so the menu doesn't match in any entries in GRUB_FILE.
> 
> Use quotemeta() to escape the meta characters.
> 
> Signed-off-by: Masayoshi Mizuma 

I applied this patch to my tree but haven't pushed it out. The ktest.pl
I use to test my kernels is a soft link to my development tree (where I
just added this patch). If it doesn't cause me any issues, I'll push it
to linux-next in a week or so and then push it to Linus after that.

-- Steve


> ---
>  tools/testing/ktest/ktest.pl | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
> index 87af8a68ab25..fc6140d45280 100755
> --- a/tools/testing/ktest/ktest.pl
> +++ b/tools/testing/ktest/ktest.pl
> @@ -1866,9 +1866,10 @@ sub get_grub2_index {
>   or dodie "unable to get $grub_file";
>  
>  my $found = 0;
> +my $grub_menu_qt = quotemeta($grub_menu);
>  
>  while () {
> - if (/^menuentry.*$grub_menu/) {
> + if (/^menuentry.*$grub_menu_qt/) {
>   $grub_number++;
>   $found = 1;
>   last;
> @@ -1909,9 +1910,10 @@ sub get_grub_index {
>   or dodie "unable to get menu.lst";
>  
>  my $found = 0;
> +my $grub_menu_qt = quotemeta($grub_menu);
>  
>  while () {
> - if (/^\s*title\s+$grub_menu\s*$/) {
> + if (/^\s*title\s+$grub_menu_qt\s*$/) {
>   $grub_number++;
>   $found = 1;
>   last;



Re: [PATCH 0/2] fix leaked of_node references in drivers/power

2019-04-17 Thread Sebastian Reichel
Hi,

On Wed, Apr 17, 2019 at 10:43:01AM +0800, Wen Yang wrote:
> The call to of_get_cpu_node/of_find_compatible_node/of_parse_phandle...
> returns a node pointer with refcount incremented thus it must be
> explicitly decremented after the last usage.
> 
> We developed a coccinelle SmPL to detect drivers/power code and
> found some issues.
> This patch series fixes those issues.
> 
> Cc: Sebastian Reichel 
> Cc: linux...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> 
> Wen Yang (2):
>   power: supply: fix leaked of_node refs in ab8500_bm_of_probe
>   power: supply: fix leaked of_node refs in
> power_supply_get_battery_info
> 
>  drivers/power/supply/ab8500_bmdata.c |  1 +
>  drivers/power/supply/power_supply_core.c | 24 
>  2 files changed, 17 insertions(+), 8 deletions(-)

Thanks, queued.

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH v4 1/2] power_supply: Add more charge types and CHARGE_CONTROL_* properties

2019-04-17 Thread Sebastian Reichel
Hi,

The changes itself look all good to me, but this does multiple
things in a single patch, so please split it into multiple commits.

-- Sebastian

On Tue, Apr 16, 2019 at 06:43:19PM -0600, Nick Crews wrote:
> Add "Standard", "Adaptive", and "Custom" modes to the charge_type
> property, to expand the existing "Trickle" and "Fast" modes.
> In addition, add POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD
> and POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD properties, to expand
> the existing CHARGE_CONTROL_* properties. I am adding them in order
> to support a new Chrome OS device, but these properties should be
> general enough that they can be used on other devices.
> 
> The meaning of "Standard" is obvious, but "Adaptive" and "Custom" are
> more tricky: "Adaptive" means that the charge controller uses some
> custom algorithm to change the charge type automatically, with no
> configuration needed. "Custom" means that the charge controller uses the
> POWER_SUPPLY_PROP_CHARGE_CONTROL_* properties as configuration for some
> other algorithm. For example, in the use case that I am supporting,
> this means the battery begins charging when the percentage
> level drops below POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD and
> charging ceases when the percentage level goes above
> POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD.
> 
> v4 changes:
> - Add documentation for the new properties, and add documentation for
>   the the previously missing charge_control_limit and
>   charge_control_limit_max properties.
> 
> Signed-off-by: Nick Crews 
> ---
>  Documentation/ABI/testing/sysfs-class-power | 51 +++--
>  drivers/power/supply/power_supply_sysfs.c   |  4 +-
>  include/linux/power_supply.h| 10 +++-
>  3 files changed, 59 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-power 
> b/Documentation/ABI/testing/sysfs-class-power
> index 5e23e22dce1b..b77e30b9014e 100644
> --- a/Documentation/ABI/testing/sysfs-class-power
> +++ b/Documentation/ABI/testing/sysfs-class-power
> @@ -114,15 +114,60 @@ Description:
>   Access: Read
>   Valid values: Represented in microamps
>  
> +What:
> /sys/class/power_supply//charge_control_limit
> +Date:Oct 2012
> +Contact: linux...@vger.kernel.org
> +Description:
> + Maximum allowable charging current. Used for charge rate
> + throttling for thermal cooling or improving battery health.
> +
> + Access: Read, Write
> + Valid values: Represented in microamps
> +
> +What:
> /sys/class/power_supply//charge_control_limit_max
> +Date:Oct 2012
> +Contact: linux...@vger.kernel.org
> +Description:
> + Maximum legal value for the charge_control_limit property.
> +
> + Access: Read
> + Valid values: Represented in microamps
> +
> +What:
> /sys/class/power_supply//charge_control_start_threshold
> +Date:April 2019
> +Contact: linux...@vger.kernel.org
> +Description:
> + Represents a battery percentage level, below which charging will
> + begin.
> +
> + Access: Read, Write
> + Valid values: 0 - 100 (percent)
> +
> +What:
> /sys/class/power_supply//charge_control_end_threshold
> +Date:April 2019
> +Contact: linux...@vger.kernel.org
> +Description:
> + Represents a battery percentage level, above which charging will
> + stop.
> +
> + Access: Read, Write
> + Valid values: 0 - 100 (percent)
> +
>  What:/sys/class/power_supply//charge_type
>  Date:July 2009
>  Contact: linux...@vger.kernel.org
>  Description:
>   Represents the type of charging currently being applied to the
> - battery.
> + battery. "Trickle", "Fast", and "Standard" all mean different
> + charging speeds. "Adaptive" means that the charger uses some
> + algorithm to adjust the charge rate dynamically, without
> + any user configuration required. "Custom" means that the charger
> + uses the charge_control_* properties as configuration for some
> + different algorithm.
>  
> - Access: Read
> - Valid values: "Unknown", "N/A", "Trickle", "Fast"
> + Access: Read, Write
> + Valid values: "Unknown", "N/A", "Trickle", "Fast", "Standard",
> +   "Adaptive", "Custom"
>  
>  What:
> /sys/class/power_supply//charge_term_current
>  Date:July 2014
> diff --git a/drivers/power/supply/power_supply_sysfs.c 
> b/drivers/power/supply/power_supply_sysfs.c
> index dce24f596160..6104a3f03d46 100644
> --- a/drivers/power/supply/power_supply_sysfs.c
> +++ b/drivers/power/supply/power_supply_sysfs.c
> @@ -56,7 +56,7 

Re: [PATCH] power: supply: sysfs: prevent endless uevent loop with CONFIG_POWER_SUPPLY_DEBUG

2019-04-17 Thread Sebastian Reichel
Hi,

On Fri, Apr 05, 2019 at 12:30:20AM -0700, Andrey Smirnov wrote:
> Fix a similar endless event loop as was done in commit 8dcf32175b4e
> ("i2c: prevent endless uevent loop with CONFIG_I2C_DEBUG_CORE"):
> 
>   The culprit is the dev_dbg printk in the i2c uevent handler. If
>   this is activated (for instance by CONFIG_I2C_DEBUG_CORE) it results
>   in an endless loop with systemd-journald.
> 
>   This happens if user-space scans the system log and reads the uevent
>   file to get information about a newly created device, which seems
>   fair use to me. Unfortunately reading the "uevent" file uses the
>   same function that runs for creating the uevent for a new device,
>   generating the next syslog entry
> 
> Both CONFIG_I2C_DEBUG_CORE and CONFIG_POWER_SUPPLY_DEBUG were reported
> in https://bugs.freedesktop.org/show_bug.cgi?id=76886 but only former
> seems to have been fixed. Change debug prints to use pr_debug insted
> to resolve the issue.
> 
> Signed-off-by: Andrey Smirnov 
> Cc: Chris Healy 
> Cc: Sebastian Reichel 
> Cc: linux...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---

I think we should just drop these debug messages, just like I2C did.
They don't offer any useful information considering the info is already
exposed to userspace via the uevent.

-- Sebastian

>  drivers/power/supply/power_supply_sysfs.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/power/supply/power_supply_sysfs.c 
> b/drivers/power/supply/power_supply_sysfs.c
> index dce24f596160..5e91946731fd 100644
> --- a/drivers/power/supply/power_supply_sysfs.c
> +++ b/drivers/power/supply/power_supply_sysfs.c
> @@ -383,14 +383,14 @@ int power_supply_uevent(struct device *dev, struct 
> kobj_uevent_env *env)
>   char *prop_buf;
>   char *attrname;
>  
> - dev_dbg(dev, "uevent\n");
> + pr_debug("%s: uevent\n", dev_name(dev));
>  
>   if (!psy || !psy->desc) {
>   dev_dbg(dev, "No power supply yet\n");
>   return ret;
>   }
>  
> - dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name);
> + pr_debug("%s: POWER_SUPPLY_NAME=%s\n", dev_name(dev), psy->desc->name);
>  
>   ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name);
>   if (ret)
> @@ -427,7 +427,8 @@ int power_supply_uevent(struct device *dev, struct 
> kobj_uevent_env *env)
>   goto out;
>   }
>  
> - dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf);
> + pr_debug("%s: prop %s=%s\n", dev_name(dev), attrname,
> +  prop_buf);
>  
>   ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, 
> prop_buf);
>   kfree(attrname);
> -- 
> 2.20.1
> 


signature.asc
Description: PGP signature


Re: [PATCH v9 02/11] dt-bindings: power: supply: add DT bindings for max77650

2019-04-17 Thread Sebastian Reichel
Hi,

On Thu, Apr 11, 2019 at 02:42:03PM +0200, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski 
> 
> Add the DT binding document for the battery charger module of max77650.
> 
> Signed-off-by: Bartosz Golaszewski 
> ---
>  .../power/supply/max77650-charger.txt | 28 +++
>  1 file changed, 28 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/power/supply/max77650-charger.txt
> 
> diff --git 
> a/Documentation/devicetree/bindings/power/supply/max77650-charger.txt 
> b/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
> new file mode 100644
> index ..821a2240e70f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
> @@ -0,0 +1,28 @@
> +Battery charger driver for MAX77650 PMIC from Maxim Integrated.
> +
> +This module is part of the MAX77650 MFD device. For more details
> +see Documentation/devicetree/bindings/mfd/max77650.txt.
> +
> +The charger is represented as a sub-node of the PMIC node on the device tree.
> +
> +Required properties:
> +
> +- compatible:Must be "maxim,max77650-charger"
> +
> +Optional properties:
> +
> +- input-voltage-min-microvolt:   Minimum CHGIN regulation voltage. Must 
> be one
> + of: 400, 410, 420, 430,
> + 440, 450, 460, 470.
> +- input-current-limit-microamp:  CHGIN input current limit (in 
> microamps). Must
> + be one of: 95000, 19, 285000, 38,
> + 475000.

The binding looks good to me.

> +Example:
> +
> +
> + charger {
> + compatible = "maxim,max77650-charger";
> + min-microvolt = <420>;
> + curr-lim-microamp = <285000>;
> + };

The example does not match the binding.

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH v9 07/11] power: supply: max77650: add support for battery charger

2019-04-17 Thread Sebastian Reichel
Hi,

On Thu, Apr 11, 2019 at 02:42:08PM +0200, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski 
> 
> Add basic support for the battery charger for max77650 PMIC.
> 
> Signed-off-by: Bartosz Golaszewski 
> Reviewed-by: Linus Walleij 
> ---

Reviewed-by: Sebastian Reichel 

-- Sebastian

>  drivers/power/supply/Kconfig|   7 +
>  drivers/power/supply/Makefile   |   1 +
>  drivers/power/supply/max77650-charger.c | 368 
>  3 files changed, 376 insertions(+)
>  create mode 100644 drivers/power/supply/max77650-charger.c
> 
> diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
> index e901b9879e7e..0230c96fa94d 100644
> --- a/drivers/power/supply/Kconfig
> +++ b/drivers/power/supply/Kconfig
> @@ -499,6 +499,13 @@ config CHARGER_DETECTOR_MAX14656
> Revision 1.2 and can be found e.g. in Kindle 4/5th generation
> readers and certain LG devices.
>  
> +config CHARGER_MAX77650
> + tristate "Maxim MAX77650 battery charger driver"
> + depends on MFD_MAX77650
> + help
> +   Say Y to enable support for the battery charger control of MAX77650
> +   PMICs.
> +
>  config CHARGER_MAX77693
>   tristate "Maxim MAX77693 battery charger driver"
>   depends on MFD_MAX77693
> diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
> index b731c2a9b695..b73eb8c5c1a9 100644
> --- a/drivers/power/supply/Makefile
> +++ b/drivers/power/supply/Makefile
> @@ -70,6 +70,7 @@ obj-$(CONFIG_CHARGER_MANAGER)   += charger-manager.o
>  obj-$(CONFIG_CHARGER_LTC3651)+= ltc3651-charger.o
>  obj-$(CONFIG_CHARGER_MAX14577)   += max14577_charger.o
>  obj-$(CONFIG_CHARGER_DETECTOR_MAX14656)  += max14656_charger_detector.o
> +obj-$(CONFIG_CHARGER_MAX77650)   += max77650-charger.o
>  obj-$(CONFIG_CHARGER_MAX77693)   += max77693_charger.o
>  obj-$(CONFIG_CHARGER_MAX8997)+= max8997_charger.o
>  obj-$(CONFIG_CHARGER_MAX8998)+= max8998_charger.o
> diff --git a/drivers/power/supply/max77650-charger.c 
> b/drivers/power/supply/max77650-charger.c
> new file mode 100644
> index ..e34714cb05ec
> --- /dev/null
> +++ b/drivers/power/supply/max77650-charger.c
> @@ -0,0 +1,368 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Copyright (C) 2018 BayLibre SAS
> +// Author: Bartosz Golaszewski 
> +//
> +// Battery charger driver for MAXIM 77650/77651 charger/power-supply.
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define MAX77650_CHARGER_ENABLED BIT(0)
> +#define MAX77650_CHARGER_DISABLED0x00
> +#define MAX77650_CHARGER_CHG_EN_MASK BIT(0)
> +
> +#define MAX77650_CHG_DETAILS_MASKGENMASK(7, 4)
> +#define MAX77650_CHG_DETAILS_BITS(_reg) \
> + (((_reg) & MAX77650_CHG_DETAILS_MASK) >> 4)
> +
> +/* Charger is OFF. */
> +#define MAX77650_CHG_OFF 0x00
> +/* Charger is in prequalification mode. */
> +#define MAX77650_CHG_PREQ0x01
> +/* Charger is in fast-charge constant current mode. */
> +#define MAX77650_CHG_ON_CURR 0x02
> +/* Charger is in JEITA modified fast-charge constant-current mode. */
> +#define MAX77650_CHG_ON_CURR_JEITA   0x03
> +/* Charger is in fast-charge constant-voltage mode. */
> +#define MAX77650_CHG_ON_VOLT 0x04
> +/* Charger is in JEITA modified fast-charge constant-voltage mode. */
> +#define MAX77650_CHG_ON_VOLT_JEITA   0x05
> +/* Charger is in top-off mode. */
> +#define MAX77650_CHG_ON_TOPOFF   0x06
> +/* Charger is in JEITA modified top-off mode. */
> +#define MAX77650_CHG_ON_TOPOFF_JEITA 0x07
> +/* Charger is done. */
> +#define MAX77650_CHG_DONE0x08
> +/* Charger is JEITA modified done. */
> +#define MAX77650_CHG_DONE_JEITA  0x09
> +/* Charger is suspended due to a prequalification timer fault. */
> +#define MAX77650_CHG_SUSP_PREQ_TIM_FAULT 0x0a
> +/* Charger is suspended due to a fast-charge timer fault. */
> +#define MAX77650_CHG_SUSP_FAST_CHG_TIM_FAULT 0x0b
> +/* Charger is suspended due to a battery temperature fault. */
> +#define MAX77650_CHG_SUSP_BATT_TEMP_FAULT0x0c
> +
> +#define MAX77650_CHGIN_DETAILS_MASK  GENMASK(3, 2)
> +#define MAX77650_CHGIN_DETAILS_BITS(_reg) \
> + (((_reg) & MAX77650_CHGIN_DETAILS_MASK) >> 2)
> +
> +#define MAX77650_CHGIN_UNDERVOLTAGE_LOCKOUT  0x00
> +#define MAX77650_CHGIN_OVERVOLTAGE_LOCKOUT   0x01
> +#define MAX77650_CHGIN_OKAY  0x11
> +
> +#define MAX77650_CHARGER_CHG_MASKBIT(1)
> +#define MAX77650_CHARGER_CHG_CHARGING(_reg) \
> + (((_reg) & MAX77650_CHARGER_CHG_MASK) > 1)
> +
> +#define MAX77650_CHARGER_VCHGIN_MIN_MASK 0xc0
> +#define MAX77650_CHARGER_VCHGIN_MIN_SHIFT(_val)  ((_val) << 5)
> +
> +#define MAX77650_CHARGER_ICHGIN_LIM_MASK 0x1c
> +#define 

Re: [PATCH v2 1/2] power: reset: nvmem-reboot-mode: use NVMEM as reboot mode write interface

2019-04-17 Thread Sebastian Reichel
On Thu, Apr 11, 2019 at 05:54:09AM +, Han Nandor wrote:
> Add a new reboot mode write interface that is using an NVMEM cell
> to store the reboot mode magic.
> 
> Signed-off-by: Nandor Han 
> ---
>  drivers/power/reset/Kconfig |  9 +++
>  drivers/power/reset/Makefile|  1 +
>  drivers/power/reset/nvmem-reboot-mode.c | 76 +
>  3 files changed, 86 insertions(+)
>  create mode 100644 drivers/power/reset/nvmem-reboot-mode.c
> 
> diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
> index 6533aa560aa1..bb4a4e854f96 100644
> --- a/drivers/power/reset/Kconfig
> +++ b/drivers/power/reset/Kconfig
> @@ -245,5 +245,14 @@ config POWER_RESET_SC27XX
> PMICs includes the SC2720, SC2721, SC2723, SC2730
> and SC2731 chips.
>  
> +config NVMEM_REBOOT_MODE
> + tristate "Generic NVMEM reboot mode driver"
> + select REBOOT_MODE
> + help
> +   Say y here will enable reboot mode driver. This will
> +   get reboot mode arguments and store it in a NVMEM cell,
> +   then the bootloader can read it and take different
> +   action according to the mode.
> +
>  endif
>  
> diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
> index 0aebee954ac1..85da3198e4e0 100644
> --- a/drivers/power/reset/Makefile
> +++ b/drivers/power/reset/Makefile
> @@ -29,3 +29,4 @@ obj-$(CONFIG_POWER_RESET_ZX) += zx-reboot.o
>  obj-$(CONFIG_REBOOT_MODE) += reboot-mode.o
>  obj-$(CONFIG_SYSCON_REBOOT_MODE) += syscon-reboot-mode.o
>  obj-$(CONFIG_POWER_RESET_SC27XX) += sc27xx-poweroff.o
> +obj-$(CONFIG_NVMEM_REBOOT_MODE) += nvmem-reboot-mode.o
> diff --git a/drivers/power/reset/nvmem-reboot-mode.c 
> b/drivers/power/reset/nvmem-reboot-mode.c
> new file mode 100644
> index ..816cfdab16a7
> --- /dev/null
> +++ b/drivers/power/reset/nvmem-reboot-mode.c
> @@ -0,0 +1,76 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (c) Vaisala Oyj. All rights reserved.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct nvmem_reboot_mode {
> + struct reboot_mode_driver reboot;
> + struct nvmem_cell *cell;
> +};
> +
> +static int nvmem_reboot_mode_write(struct reboot_mode_driver *reboot,
> + unsigned int magic)
> +{
> + int ret;
> + struct nvmem_reboot_mode *nvmem_rbm;
> +
> + nvmem_rbm = container_of(reboot, struct nvmem_reboot_mode, reboot);
> +
> + ret = nvmem_cell_write(nvmem_rbm->cell, , sizeof(magic));
> + if (ret < 0)
> + dev_err(reboot->dev, "update reboot mode bits failed\n");
> +
> + return ret;
> +}
> +
> +static int nvmem_reboot_mode_probe(struct platform_device *pdev)
> +{
> + int ret;
> + struct nvmem_reboot_mode *nvmem_rbm;
> +
> + nvmem_rbm = devm_kzalloc(>dev, sizeof(*nvmem_rbm), GFP_KERNEL);
> + if (!nvmem_rbm)
> + return -ENOMEM;
> +
> + nvmem_rbm->reboot.dev = >dev;
> + nvmem_rbm->reboot.write = nvmem_reboot_mode_write;
> +
> + nvmem_rbm->cell = devm_nvmem_cell_get(>dev, "reboot-mode");
> + if (IS_ERR(nvmem_rbm->cell)) {
> + dev_err(>dev, "failed to get the nvmem cell 
> reboot-mode\n");
> + return PTR_ERR(nvmem_rbm->cell);
> + }
> +
> + ret = devm_reboot_mode_register(>dev, _rbm->reboot);
> + if (ret)
> + dev_err(>dev, "can't register reboot mode\n");
> +
> + return ret;
> +}
> +
> +static const struct of_device_id nvmem_reboot_mode_of_match[] = {
> + { .compatible = "nvmem-reboot-mode" },
> + {}
> +};
> +MODULE_DEVICE_TABLE(of, nvmem_reboot_mode_of_match);
> +
> +static struct platform_driver nvmem_reboot_mode_driver = {
> + .probe = nvmem_reboot_mode_probe,
> + .driver = {
> + .name = "nvmem-reboot-mode",
> + .of_match_table = nvmem_reboot_mode_of_match,
> + },
> +};
> +module_platform_driver(nvmem_reboot_mode_driver);
> +
> +MODULE_AUTHOR("Nandor Han ");
> +MODULE_DESCRIPTION("NVMEM reboot mode driver");
> +MODULE_LICENSE("GPL v2");

I suppose as of bf7fbeeae6db "GPL v2" is deprecated, before it was
often read as GPL v2 only. In both cases it makes sense to just
use "GPL". Otherwise the driver looks fine to me, but I'm waiting
for Rob's review of the DT bindings before merging this.

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH v4 0/7] ARM: sun8i: a83t: Enable USB OTG

2019-04-17 Thread Sebastian Reichel
Hi,

On Tue, Apr 16, 2019 at 02:40:17PM +0800, Chen-Yu Tsai wrote:
> This is v4 of my A83T USB power supply / OTG series. Hopefully this is
> the last revision even though it's kind of late in the -rc cycle for
> the patches to make the next release. Fingers crossed.

Patches 1-5 (i.e. all except DTS) queued to power-supply subsystem.

-- Sebastian


signature.asc
Description: PGP signature


EDAC: Fix memory leak in creating CSROW object

2019-04-17 Thread Pan Bian
In the function that creates a CSROW object, the object is not released
when failing to add the device to device hierarchy. This may result in a
memory leak bug.

Signed-off-by: Pan Bian 
---
 drivers/edac/edac_mc_sysfs.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4641746..2dafb08 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -404,6 +404,7 @@ static inline int nr_pages_per_csrow(struct csrow_info 
*csrow)
 static int edac_create_csrow_object(struct mem_ctl_info *mci,
struct csrow_info *csrow, int index)
 {
+   int err;
csrow->dev.type = _attr_type;
csrow->dev.groups = csrow_dev_groups;
device_initialize(>dev);
@@ -415,7 +416,10 @@ static int edac_create_csrow_object(struct mem_ctl_info 
*mci,
edac_dbg(0, "creating (virtual) csrow node %s\n",
 dev_name(>dev));
 
-   return device_add(>dev);
+   err = device_add(>dev);
+   if (err)
+   put_device(>dev);
+   return err;
 }
 
 /* Create a CSROW object under specifed edac_mc_device */
-- 
2.7.4




Re: [PATCH] ktest: Add workaround to avoid unexpected power cycle

2019-04-17 Thread Steven Rostedt
On Wed, 17 Apr 2019 16:59:13 -0400
Masayoshi Mizuma  wrote:

> > > diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
> > > index ea07d43856b8..765c6bc83ab4 100755
> > > --- a/tools/testing/ktest/ktest.pl
> > > +++ b/tools/testing/ktest/ktest.pl
> > > @@ -1737,6 +1737,11 @@ sub run_command {
> > >  my $dord = 0;
> > >  my $dostdout = 0;
> > >  my $pid;
> > > +my $is_default_reboot = 0;
> > > +
> > > +if ($command eq $default{REBOOT}) {
> > > + $is_default_reboot = 1;
> > > +}  
> > 
> > Do we really need to add this variable?
> >   
> 
> > >  
> > >  $command =~ s/\$SSH_USER/$ssh_user/g;
> > >  $command =~ s/\$MACHINE/$machine/g;  
> 
> $command is modified here, so...

Ah thanks. This is what I get for reviewing patches without looking at
the code it touches ;-)

> 
> > > @@ -1791,6 +1796,10 @@ sub run_command {
> > >  # shift 8 for real exit status
> > >  $run_command_status = $? >> 8;
> > >  
> > > +if ($run_command_status == 255 && $is_default_reboot) {  
> > 
> > Instead can we have:
> > 
> > if ($command eq $default{REBOOT} &&
> > $run_command_status == $reboot_return_code) {
> > 
> > ?  
> 
> How about the following?
> 
> @@ -1737,6 +1740,7 @@ sub run_command {
>  my $dord = 0;
>  my $dostdout = 0;
>  my $pid;
> +my $command_orig = $command;
> 
>  $command =~ s/\$SSH_USER/$ssh_user/g;
>  $command =~ s/\$MACHINE/$machine/g;
> @@ -1791,6 +1795,11 @@ sub run_command {
>  # shift 8 for real exit status
>  $run_command_status = $? >> 8;
> 
> +if ($command_orig eq $default{REBOOT} &&
> +   $run_command_status == $reboot_return_code) {
> +   $run_command_status = 0;
> +}
> +

Looks fine to me.

Thanks!

-- Steve

>  close(CMD);
>  close(LOG) if ($dolog);
>  close(RD)  if ($dord);
> 
> Thanks!
> Masa



Re: [RFC PATCH v2] perf/x86: make perf callchain work without CONFIG_FRAME_POINTER

2019-04-17 Thread Josh Poimboeuf
On Wed, Apr 17, 2019 at 09:07:35AM +0200, Peter Zijlstra wrote:
> On Tue, Apr 16, 2019 at 03:15:59PM -0500, Josh Poimboeuf wrote:
> > If you do the regs->eflags thing to mark the regs as fake in
> > (perf_arch_fetch_caller_regs()), then I don't think skip_sp would be
> > needed, because regs->sp could probably mark the skip point.
> > 
> > Instead I was actually hoping we could get rid of fake regs and
> > perf_arch_fetch_caller_regs() altogether, because it's a nasty hack.
> > But I don't know what else those fake regs are used for.
> 
> This is the generic perf generate a sample path. It doesn't know the
> context. Normally it is an interrupt or exception of sorts and we simply
> pass the pt_regs from that down the chain and all is good.
> 
> It is just for a few software events, such as SW_CONTEXT_SWITCH,
> SW_MIGRATIONS and all the TP muck that we do not have regs to pass down.
> 
> These regs are used by:
> 
>  - SAMPLE_IP,
>  - SAMPLE_CALLCHAIN (this here issue),
>  - SAMPLE_REGS_INTR,
>  - a few misc bits
> 
> There's actually comment on top of perf_fetch_caller_regs() that tries
> to explain what is needed -- although I suppose that could do with an
> update.

Ok, I guess it makes a little more sense now to my perf-ignorant brain.
An improved comment would definitely help.

Sounds like using a reserved regs->flags bit to indicate fake regs is
the way to go.

-- 
Josh


Input: synaptics-rmi4: fix possible double free

2019-04-17 Thread Pan Bian
The RMI4 function structure has been released in rmi_register_function
if error occurs. However, it will be released again in the function
rmi_create_function, which may result in a double-free bug.

Signed-off-by: Pan Bian 
---
 drivers/input/rmi4/rmi_driver.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index fc3ab93..7fb358f 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
 
error = rmi_register_function(fn);
if (error)
-   goto err_put_fn;
+   return error;
 
if (pdt->function_number == 0x01)
data->f01_container = fn;
@@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
list_add_tail(>node, >function_list);
 
return RMI_SCAN_CONTINUE;
-
-err_put_fn:
-   put_device(>dev);
-   return error;
 }
 
 void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
-- 
2.7.4




Re: [PATCH 4/5] mm: rework non-root kmem_cache lifecycle management

2019-04-17 Thread Shakeel Butt
On Wed, Apr 17, 2019 at 5:39 PM Roman Gushchin  wrote:
>
> On Wed, Apr 17, 2019 at 04:41:01PM -0700, Shakeel Butt wrote:
> > On Wed, Apr 17, 2019 at 2:55 PM Roman Gushchin  wrote:
> > >
> > > This commit makes several important changes in the lifecycle
> > > of a non-root kmem_cache, which also affect the lifecycle
> > > of a memory cgroup.
> > >
> > > Currently each charged slab page has a page->mem_cgroup pointer
> > > to the memory cgroup and holds a reference to it.
> > > Kmem_caches are held by the cgroup. On offlining empty kmem_caches
> > > are freed, all other are freed on cgroup release.
> >
> > No, they are not freed (i.e. destroyed) on offlining, only
> > deactivated. All memcg kmem_caches are freed/destroyed on memcg's
> > css_free.
>
> You're right, my bad. I was thinking about the corresponding sysfs entry
> when was writing it. We try to free it from the deactivation path too.
>
> >
> > >
> > > So the current scheme can be illustrated as:
> > > page->mem_cgroup->kmem_cache.
> > >
> > > To implement the slab memory reparenting we need to invert the scheme
> > > into: page->kmem_cache->mem_cgroup.
> > >
> > > Let's make every page to hold a reference to the kmem_cache (we
> > > already have a stable pointer), and make kmem_caches to hold a single
> > > reference to the memory cgroup.
> >
> > What about memcg_kmem_get_cache()? That function assumes that by
> > taking reference on memcg, it's kmem_caches will stay. I think you
> > need to get reference on the kmem_cache in memcg_kmem_get_cache()
> > within the rcu lock where you get the memcg through css_tryget_online.
>
> Yeah, a very good question.
>
> I believe it's safe because css_tryget_online() guarantees that
> the cgroup is online and won't go offline before css_free() in
> slab_post_alloc_hook(). I do initialize kmem_cache's refcount to 1
> and drop it on offlining, so it protects the online kmem_cache.
>

Let's suppose a thread doing a remote charging calls
memcg_kmem_get_cache() and gets an empty kmem_cache of the remote
memcg having refcnt equal to 1. That thread got a reference on the
remote memcg but no reference on the kmem_cache. Let's suppose that
thread got stuck in the reclaim and scheduled away. In the meantime
that remote memcg got offlined and decremented the refcnt of all of
its kmem_caches. The empty kmem_cache which the thread stuck in
reclaim have pointer to can get deleted and may be using an already
destroyed kmem_cache after coming back from reclaim.

I think the above situation is possible unless the thread gets the
reference on the kmem_cache in memcg_kmem_get_cache().

Shakeel


Re: [PATCH v2] Input: uinput: Avoid Object-Already-Free with a global lock

2019-04-17 Thread dmitry.torok...@gmail.com
Hi Mukesh,

On Mon, Apr 15, 2019 at 03:35:51PM +0530, Mukesh Ojha wrote:
> 
> Hi Dmitry,
> 
> Can you please have a look at this patch ? as this seems to reproducing
> quite frequently
> 
> Thanks,
> Mukesh
> 
> On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> > uinput_destroy_device() gets called from two places. In one place,
> > uinput_ioctl_handler() where it is protected under a lock
> > udev->mutex but there is no protection on udev device from freeing
> > inside uinput_release().

uinput_release() should be called when last file handle to the uinput
instance is being dropped, so there should be no other users and thus we
can't be racing with anyone.

> > 
> > This can result in Object-Already-Free case where uinput parent
> > device already got freed while a child being inserted inside it.
> > That result in a double free case for parent while kernfs_put()
> > being done for child in a failure path of adding a node.

Can you please describe scenario in more detail? How do you free the
parent device while child input device is being registered?

Thanks.

- 
Dmitry


Re: [PATCH v3 1/3] dt-bindings: input: add GPIO controllable vibrator

2019-04-17 Thread Rob Herring
On Wed, Apr 17, 2019 at 11:02 AM Luca Weiss  wrote:
>
> On Freitag, 12. April 2019 17:06:23 CEST Luca Weiss wrote:
> > Provide a simple driver for GPIO controllable vibrators.
> > It will be used by the Fairphone 2.
> >
> > Signed-off-by: Luca Weiss 
> > ---
> >  .../bindings/input/gpio-vibrator.txt  | 20 +++
> >  1 file changed, 20 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/input/gpio-vibrator.txt
> >
> > diff --git a/Documentation/devicetree/bindings/input/gpio-vibrator.txt
> > b/Documentation/devicetree/bindings/input/gpio-vibrator.txt new file mode
> > 100644
> > index ..93e5a8e7622d
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/input/gpio-vibrator.txt
> > @@ -0,0 +1,20 @@
> > +* GPIO vibrator device tree bindings
> > +
> > +Registers a GPIO device as vibrator, where the vibration motor just has the
> > +capability to turn on or off. If the device is connected to a pwm, you
> > should +use the pwm-vibrator driver instead.
> > +
> > +Required properties:
> > +- compatible: should contain "gpio-vibrator"
> > +- enable-gpios: Should contain a GPIO handle
> > +
> > +Optional properties:
> > +- vcc-supply: Phandle for the regulator supplying power
> > +
> > +Example from Fairphone 2:
> > +
> > +vibrator {
> > + compatible = "gpio-vibrator";
> > + enable-gpios = < 86 GPIO_ACTIVE_HIGH>;
> > + vcc-supply = <_l18>;
> > +};
>
> I see that the yaml based device tree binding docs seem to be the new hotness?
> Is there any "policy" / preference about new drivers?

Not required yet, but welcomed. It's still a trickle so we can work
out any issues and in some cases the common bindings still need to be
done. I'm starting to ask subsystem maintainers to require DT schemas
though.

This one looks straightforward to use the schema.

Rob


Re: [PATCH v2 02/15] [media] mtk-mipicsi: add mediatek mipicsi driver for mt2712

2019-04-17 Thread CK Hu
Hi, Stu:

On Tue, 2019-04-16 at 17:30 +0800, Stu Hsieh wrote:
> This patch add mediatek mipicsi driver for mt2712,
> including probe function to get the value from device tree,
> and register to v4l2 the host device.
> 
> Signed-off-by: Stu Hsieh 
> ---
>  drivers/media/platform/mtk-mipicsi/Makefile   |   4 +
>  .../media/platform/mtk-mipicsi/mtk_mipicsi.c  | 767 ++
>  2 files changed, 771 insertions(+)
>  create mode 100644 drivers/media/platform/mtk-mipicsi/Makefile
>  create mode 100644 drivers/media/platform/mtk-mipicsi/mtk_mipicsi.c
> 
> diff --git a/drivers/media/platform/mtk-mipicsi/Makefile 
> b/drivers/media/platform/mtk-mipicsi/Makefile
> new file mode 100644
> index ..326a5e3808fa
> --- /dev/null
> +++ b/drivers/media/platform/mtk-mipicsi/Makefile
> @@ -0,0 +1,4 @@
> +mtk-mipicsi-y += mtk_mipicsi.o
> +
> +obj-$(CONFIG_VIDEO_MEDIATEK_MIPICSI) += mtk-mipicsi.o
> +
> diff --git a/drivers/media/platform/mtk-mipicsi/mtk_mipicsi.c 
> b/drivers/media/platform/mtk-mipicsi/mtk_mipicsi.c
> new file mode 100644
> index ..e26bebe17fe5
> --- /dev/null
> +++ b/drivers/media/platform/mtk-mipicsi/mtk_mipicsi.c
> @@ -0,0 +1,767 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2017 MediaTek Inc.
> + * Author: Ricky Zhang 
> + * Baoyin Zhang 
> + * Alan Yue 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * http://www.gnu.org/licenses/gpl-2.0.html for more details.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#ifdef CONFIG_VB2_MEDIATEK_DMA_SG
> +#include "mtkbuf-dma-cache-sg.h"
> +#endif
> +
> +#define MTK_MIPICSI_DRV_NAME "mtk-mipicsi"
> +#define MTK_PLATFORM_STR "platform:mt2712"
> +#define MIPICSI_COMMON_CLK 2
> +#define MTK_CAMDMA_MAX_NUM 4U
> +#define MIPICSI_CLK (MIPICSI_COMMON_CLK + MTK_CAMDMA_MAX_NUM)
> +#define MTK_DATAWIDTH_8  (0x01U << 7U)
> +#define MAX_SUPPORT_WIDTH 4096U
> +#define MAX_SUPPORT_HEIGHT4096U
> +#define MAX_BUFFER_NUM   32U
> +#define VID_LIMIT_BYTES  (100U * 1024U * 1024U)
> +
> +/* buffer for one video frame */
> +struct mtk_mipicsi_buf {
> + struct list_head queue;
> + struct vb2_buffer *vb;
> + dma_addr_t vb_dma_addr_phy;
> + int prepare_flag;
> +};
> +
> +struct mtk_mipicsi_dev {
> + struct soc_camera_host  soc_host;
> + struct platform_device *pdev;
> + unsigned int camsv_num;
> + struct v4l2_device  v4l2_dev;
> + struct device *larb_pdev;
> + void __iomem*ana;
> + void __iomem*seninf_ctrl;

Separating register control to another patch looks strange to me.
Register control is the bottom part and this patch is the top part. You
send a top part first then the bottom part. 'seninf_ctrl' is useless in
this patch, you may move this to the patch that use this variable or
merge that patch into this patch.

Regards,
CK

> + void __iomem*seninf;
> + struct regmap   *seninf_top;
> + void __iomem*seninf_mux[MTK_CAMDMA_MAX_NUM];
> + void __iomem*camsv[MTK_CAMDMA_MAX_NUM];
> + const struct soc_camera_format_xlate *current_fmt;
> + u16 width_flags;/* max 12 bits */
> + struct list_headcapture_list[MTK_CAMDMA_MAX_NUM];
> + struct list_headfb_list;
> + spinlock_t  lock;
> + spinlock_t  queue_lock;
> + struct mtk_mipicsi_buf  cam_buf[MAX_BUFFER_NUM];
> + bool streamon;
> + unsigned long frame_cnt[MTK_CAMDMA_MAX_NUM];
> + unsigned int link;
> + unsigned long enqueue_cnt;
> + unsigned long dequeue_cnt;
> + struct v4l2_ctrl_handler ctrl_hdl;
> + char drv_name[16];
> + u32 id;
> + int clk_num;
> + struct clk  *clk[MIPICSI_CLK];
> +};
> +



Re: [alsa-devel] [PATCH -next] ASoC: Intel: Haswell: Remove set but not used variable 'stage_type'

2019-04-17 Thread YueHaibing
On 2019/4/18 1:41, Pierre-Louis Bossart wrote:
> On 4/17/19 10:11 AM, Yue Haibing wrote:
>> From: YueHaibing 
>>
>> Fixes gcc '-Wunused-but-set-variable' warning:
>>
>> sound/soc/intel/haswell/sst-haswell-ipc.c: In function 'hsw_stream_message':
>> sound/soc/intel/haswell/sst-haswell-ipc.c:669:29: warning: variable 
>> 'stage_type' set but not used [-Wunused-but-set-variable]
>>
>> It is never used since introduction in
>> commit ba57f68235cf ("ASoC: Intel: create haswell folder and move haswell 
>> platform files in")
> 
> Thanks for the patch.
> 
> I think you could go further in the cleanup and remove both the variable 
> declaration and the static inline mst_get_stage_type that will also be 
> useless as a consequence of this assignment removal.

You are right.
> 
> Care to send a v2?

Sure, will do that,thanks!
> 
>>
>> Signed-off-by: YueHaibing 
>> ---
>>   sound/soc/intel/haswell/sst-haswell-ipc.c | 1 -
>>   1 file changed, 1 deletion(-)
>>
>> diff --git a/sound/soc/intel/haswell/sst-haswell-ipc.c 
>> b/sound/soc/intel/haswell/sst-haswell-ipc.c
>> index 31fcdf12..4d3de99 100644
>> --- a/sound/soc/intel/haswell/sst-haswell-ipc.c
>> +++ b/sound/soc/intel/haswell/sst-haswell-ipc.c
>> @@ -672,7 +672,6 @@ static int hsw_stream_message(struct sst_hsw *hsw, u32 
>> header)
>> stream_msg = msg_get_stream_type(header);
>>   stream_id = msg_get_stream_id(header);
>> -stage_type = msg_get_stage_type(header);
>> stream = get_stream_by_id(hsw, stream_id);
>>   if (stream == NULL)
>>
> 
> 
> .
> 



RE: Issues with i.MX SPI DMA transfers

2019-04-17 Thread Robin Gong
Hi Igor,
Did you meet any issue with my latest patch?

> -Original Message-
> From: Robin Gong
> Sent: April 9, 2019 11:26> 
> Hi Igor,
>   Please have a try with the attached patches, and revert 25aaa75df1e6,
> ad0d92d7ba6a , dd4b487b32a3, df07101e1c4a before apply. Besides XCH, tx
> thresh should be set to 0 , now no failure caught on ecspi5.
> 


Зарулю ваш сайт в top 4.

2019-04-17 Thread Александр Петров
Я знаю как стать первым номером в росте позиций сайтов.
Вы покаместь не в top=1? 
Я эксперт в продвижении сео уже девять лет. Есть перворазрядное портфолио и 
опыт.
Умудрюсь просунуть в google поиск.
Первостепенное с чем могу помочь это аудит.
Следом исправить что необходимо.
И в конце протолкнуть в top 7.
Если вам это требуется отпишитесь.
Портфолио по пожеланию.
Помесячные отчеты.
Обнаружьте о своем сайте всё, жду ответа.
Ноль шесть 7 три один пять сем два 0 8

-- реклама ---
Поторопись зарегистрировать самый короткий почтовый адрес @i.ua
https://mail.i.ua/reg - и получи 1Gb для хранения писем


Re: [greybus-dev] [PATCH v2] Staging: greybus: Cleanup in greybus driver

2019-04-17 Thread Madhumitha Prabakaran
On 04/17  :41, Alex Elder wrote:
> On 4/16/19 5:13 PM, Madhumitha Prabakaran wrote:
> > Fix a blank line after structure declarations. Also, convert
> > macros into inline functions in order to maintain Linux kernel
> > coding style based on which the inline function is
> > preferable over the macro.
> 
> Madhumitha, here is my explanation for why *not* to convert these
> container_of() macros to inline functions.  It's not just because
> "we do this all over the kernel."  The reason is that it actually
> does not improve the code.
> 
> Inline functions are often better than macros because they are
> explicit about types, allowing the compiler to tell you if you
> are passing parameters of the right type, and possibly assigning
> results to objects of the right type.


This makes more sense now.

> 
> Here is the definition of the container_of() macro in :
> 
> #define container_of(ptr, type, member) ({  \
> const typeof(((type *)0)->member) * __mptr = (ptr); \
> (type *)((char *)__mptr - offsetof(type, member)); })
> 
> You see that ptr is explicitly assigned to a local variable
> of the type of the member field, so the compiler is able to
> check that assignment.  And the return value is similarly
> cast to the type of the containing structure, so the type
> compatibility of the assignment can also be checked.  It is
> assumed that where this macro is used, the caller knows it
> is passing an appropriate address.
> 
> There is another thing about this particular definition make
> it just as good as an inline function.  A macro expansion can
> result in one of its parameters being used more than once, and
> that allows those parameters to be *evaluated* more than once
> in a single statement, which can produce incorrect code.
> 
> This macro is defined using the "statement expression"
> extension to C--where a compound statement is enclosed in
> parentheses--({ ... }).  This allows a local variable to be
> used in the macro expansion, which avoids any reuse of the
> macro parameters which might cause side-effects.
> 
> So anyway, I don't think there is any benefit to replacing
> macros like this that do container_of() with inline functions.
> 
>   -Alex

Thanks for taking time to explain it in detailed way.

> 
> > Blank line fixes are suggested by checkpatch.pl
> > 
> > Signed-off-by: Madhumitha Prabakaran 
> > 
> > Changes in v2:
> > - To maintain consistency in driver greybus, all the instances of macro
> > with container_of are fixed in a single patch.
> > ---
> >  drivers/staging/greybus/bundle.h|  6 +-
> >  drivers/staging/greybus/control.h   |  6 +-
> >  drivers/staging/greybus/gbphy.h | 12 ++--
> >  drivers/staging/greybus/greybus.h   |  6 +-
> >  drivers/staging/greybus/hd.h|  6 +-
> >  drivers/staging/greybus/interface.h |  6 +-
> >  drivers/staging/greybus/module.h|  6 +-
> >  drivers/staging/greybus/svc.h   |  6 +-
> >  8 files changed, 45 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/staging/greybus/bundle.h 
> > b/drivers/staging/greybus/bundle.h
> > index 8734d2055657..84956eedb1c4 100644
> > --- a/drivers/staging/greybus/bundle.h
> > +++ b/drivers/staging/greybus/bundle.h
> > @@ -31,7 +31,11 @@ struct gb_bundle {
> >  
> > struct list_headlinks;  /* interface->bundles */
> >  };
> > -#define to_gb_bundle(d) container_of(d, struct gb_bundle, dev)
> > +
> > +static inline struct gb_bundle *to_gb_bundle(struct device *d)
> > +{
> > +   return container_of(d, struct gb_bundle, dev);
> > +}
> >  
> >  /* Greybus "private" definitions" */
> >  struct gb_bundle *gb_bundle_create(struct gb_interface *intf, u8 bundle_id,
> > diff --git a/drivers/staging/greybus/control.h 
> > b/drivers/staging/greybus/control.h
> > index 3a29ec05f631..a681ef74e7fe 100644
> > --- a/drivers/staging/greybus/control.h
> > +++ b/drivers/staging/greybus/control.h
> > @@ -24,7 +24,11 @@ struct gb_control {
> > char *vendor_string;
> > char *product_string;
> >  };
> > -#define to_gb_control(d) container_of(d, struct gb_control, dev)
> > +
> > +static inline struct gb_control *to_gb_control(struct device *d)
> > +{
> > +   return container_of(d, struct gb_control, dev);
> > +}
> >  
> >  struct gb_control *gb_control_create(struct gb_interface *intf);
> >  int gb_control_enable(struct gb_control *control);
> > diff --git a/drivers/staging/greybus/gbphy.h 
> > b/drivers/staging/greybus/gbphy.h
> > index 99463489d7d6..20307f6dfcb9 100644
> > --- a/drivers/staging/greybus/gbphy.h
> > +++ b/drivers/staging/greybus/gbphy.h
> > @@ -15,7 +15,11 @@ struct gbphy_device {
> > struct list_head list;
> > struct device dev;
> >  };
> > -#define to_gbphy_dev(d) container_of(d, struct gbphy_device, dev)
> > +
> > +static inline struct gbphy_device *to_gbphy_dev(struct device *d)
> > +{
> > +   return container_of(d, struct gbphy_device, dev);
> > +}
> >  
> >  

Re: linux-next: build warning after merge of the block tree

2019-04-17 Thread Chao Yu
On 2019/4/17 22:03, Jaegeuk Kim wrote:
> On 04/17, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2019/4/17 10:31, Stephen Rothwell wrote:
>>> Hi all,
>>>
>>> After merging the block tree, today's linux-next build (x86_64
>>> allmodconfig) produced this warning:
>>>
>>> fs/f2fs/node.c: In function 'f2fs_remove_inode_page':
>>> fs/f2fs/node.c:1193:47: warning: format '%zu' expects argument of type 
>>> 'size_t', but argument 5 has type 'blkcnt_t' {aka 'long long unsigned int'} 
>>> [-Wformat=]
>>> "Inconsistent i_blocks, ino:%lu, iblocks:%zu",
>>>  ~~^
>>>  %llu
>>> inode->i_ino, inode->i_blocks);
>>>   ~~~   
>>
>> Could you please help to fix that as below in your dev branch directly?
>>
>> "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
> 
> We can just use "%lu"?

We'd better follow sample in Documentation/core-api/printk-formats.rst:

If  is dependent on a config option for its size (e.g., sector_t,
blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
format specifier of its largest possible type and explicitly cast to it.

Example::

printk("test: sector number/total blocks: %llu/%llu\n",
(unsigned long long)sector, (unsigned long long)blockcount);

Thanks,

> 
>> inode->i_ino, (unsigned long long)inode->i_blocks)
>>
>>
>> It looks that we need to fix below commits as well:
>>
>> f2fs: fix to avoid panic in dec_valid_block_count()
>> f2fs: fix to avoid panic in dec_valid_node_count()
>>
>> Thanks,
>>
>>>
>>> Introduced by commit
>>>
>>>   90ae238d9dac ("f2fs: fix to avoid panic in f2fs_remove_inode_page()")
>>>
>>> from the f2fs tree interacting with commit
>>>
>>>   72deb455b5ec ("block: remove CONFIG_LBDAF")
>>>
> .
> 


[PATCH] RISC-V: redefine PTRS_PER_PGD/PTRS_PER_PMD/PTRS_PER_PTE

2019-04-17 Thread damon
Use the number of addresses to define the relevant macros.

Signed-off-by: damon 
---
 arch/riscv/include/asm/pgtable-32.h | 2 ++
 arch/riscv/include/asm/pgtable-64.h | 3 ++-
 arch/riscv/include/asm/pgtable.h| 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable-32.h 
b/arch/riscv/include/asm/pgtable-32.h
index d61974b7..93607f6 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -17,8 +17,10 @@
 #include 
 #include 
 
+#define MAX_USER_VA_BITS   32
 /* Size of region mapped by a page global directory */
 #define PGDIR_SHIFT 22
+#define PMD_SHIFT   PGDIR_SHIFT
 #define PGDIR_SIZE  (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK  (~(PGDIR_SIZE - 1))
 
diff --git a/arch/riscv/include/asm/pgtable-64.h 
b/arch/riscv/include/asm/pgtable-64.h
index 7aa0ea9..a56d4d0 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -16,6 +16,7 @@
 
 #include 
 
+#define MAX_USER_VA_BITS   39
 #define PGDIR_SHIFT 30
 /* Size of region mapped by a page global directory */
 #define PGDIR_SIZE  (_AC(1, UL) << PGDIR_SHIFT)
@@ -34,7 +35,7 @@
 #define pmd_val(x)  ((x).pmd)
 #define __pmd(x)((pmd_t) { (x) })
 
-#define PTRS_PER_PMD(PAGE_SIZE / sizeof(pmd_t))
+#define PTRS_PER_PMD(1 << (PGDIR_SHIFT - PMD_SHIFT))
 
 static inline int pud_present(pud_t pud)
 {
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 1141364..d9cb3c8 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -33,9 +33,9 @@
 #endif /* CONFIG_64BIT */
 
 /* Number of entries in the page global directory */
-#define PTRS_PER_PGD(PAGE_SIZE / sizeof(pgd_t))
+#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
 /* Number of entries in the page table */
-#define PTRS_PER_PTE(PAGE_SIZE / sizeof(pte_t))
+#define PTRS_PER_PTE   (1 << (PMD_SHIFT - PAGE_SHIFT))
 
 /* Number of PGD entries that a user-mode program can use */
 #define USER_PTRS_PER_PGD   (TASK_SIZE / PGDIR_SIZE)
-- 
1.9.1



Re: [PATCH v2] Staging: greybus: Cleanup in greybus driver

2019-04-17 Thread Madhumitha Prabakaran
On 04/17  :25, Greg KH wrote:
> On Tue, Apr 16, 2019 at 05:13:18PM -0500, Madhumitha Prabakaran wrote:
> > Fix a blank line after structure declarations. Also, convert
> > macros into inline functions in order to maintain Linux kernel
> > coding style based on which the inline function is
> > preferable over the macro.
> > 
> > Blank line fixes are suggested by checkpatch.pl
> > 
> > Signed-off-by: Madhumitha Prabakaran 
> > 
> > Changes in v2:
> > - To maintain consistency in driver greybus, all the instances of macro
> > with container_of are fixed in a single patch.
> > ---
> >  drivers/staging/greybus/bundle.h|  6 +-
> >  drivers/staging/greybus/control.h   |  6 +-
> >  drivers/staging/greybus/gbphy.h | 12 ++--
> >  drivers/staging/greybus/greybus.h   |  6 +-
> >  drivers/staging/greybus/hd.h|  6 +-
> >  drivers/staging/greybus/interface.h |  6 +-
> >  drivers/staging/greybus/module.h|  6 +-
> >  drivers/staging/greybus/svc.h   |  6 +-
> >  8 files changed, 45 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/staging/greybus/bundle.h 
> > b/drivers/staging/greybus/bundle.h
> > index 8734d2055657..84956eedb1c4 100644
> > --- a/drivers/staging/greybus/bundle.h
> > +++ b/drivers/staging/greybus/bundle.h
> > @@ -31,7 +31,11 @@ struct gb_bundle {
> >  
> > struct list_headlinks;  /* interface->bundles */
> >  };
> > -#define to_gb_bundle(d) container_of(d, struct gb_bundle, dev)
> > +
> > +static inline struct gb_bundle *to_gb_bundle(struct device *d)
> > +{
> > +   return container_of(d, struct gb_bundle, dev);
> > +}
> 
> Why are we changing this to an inline function?  The #define is fine,
> and how we "normally" do this type of container_of conversion.
> 
> I understand the objection of the "no blank line", but this was the
> "original" style that we used to create these #defines from the very
> beginning of the driver model work a decade ago.  Changing that
> muscle-memory is going to be hard for some of us.  Look at
> drivers/base/base.h for other examples of this.

Thanks for explaining it.

> 
> thanks,
> 
> greg k-h


Re: [PATCH v8 04/16] sched/core: uclamp: Add system default clamps

2019-04-17 Thread Suren Baghdasaryan
On Tue, Apr 2, 2019 at 3:42 AM Patrick Bellasi  wrote:
>
> Tasks without a user-defined clamp value are considered not clamped
> and by default their utilization can have any value in the
> [0..SCHED_CAPACITY_SCALE] range.
>
> Tasks with a user-defined clamp value are allowed to request any value
> in that range, and the required clamp is unconditionally enforced.
> However, a "System Management Software" could be interested in limiting
> the range of clamp values allowed for all tasks.
>
> Add a privileged interface to define a system default configuration via:
>
>   /proc/sys/kernel/sched_uclamp_util_{min,max}
>
> which works as an unconditional clamp range restriction for all tasks.
>
> With the default configuration, the full SCHED_CAPACITY_SCALE range of
> values is allowed for each clamp index. Otherwise, the task-specific
> clamp is capped by the corresponding system default value.
>
> Do that by tracking, for each task, the "effective" clamp value and
> bucket the task has been refcounted in at enqueue time. This
> allows to lazy aggregate "requested" and "system default" values at
> enqueue time and simplifies refcounting updates at dequeue time.
>
> The cached bucket ids are used to avoid (relatively) more expensive
> integer divisions every time a task is enqueued.
>
> An active flag is used to report when the "effective" value is valid and
> thus the task is actually refcounted in the corresponding rq's bucket.
>
> Signed-off-by: Patrick Bellasi 
> Cc: Ingo Molnar 
> Cc: Peter Zijlstra 
>
> ---
> Changes in v8:
>  Message-ID: <20190313201010.gu2...@worktop.programming.kicks-ass.net>
>  - add "requested" values uclamp_se instance beside the existing
>"effective" values instance
>  - rename uclamp_effective_{get,assign}() into uclamp_eff_{get,set}()
>  - make uclamp_eff_get() return the new "effective" values by copy
> Message-ID: <20190318125844.ajhjpaqlcgxn7qkq@e110439-lin>
>  - run uclamp_fork() code independently from the class being supported.
>Resetting active flag is not harmful and following patches will add
>other code which still needs to be executed independently from class
>support.
> Message-ID: <20190313201342.gv2...@worktop.programming.kicks-ass.net>
>  - add sysctl_sched_uclamp_handler()'s internal mutex to serialize
>concurrent usages
> ---
>  include/linux/sched.h|  10 +++
>  include/linux/sched/sysctl.h |  11 +++
>  kernel/sched/core.c  | 131 ++-
>  kernel/sysctl.c  |  16 +
>  4 files changed, 167 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 0c0dd7aac8e9..d8491954e2e1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -584,14 +584,21 @@ struct sched_dl_entity {
>   * Utilization clamp for a scheduling entity
>   * @value: clamp value "assigned" to a se
>   * @bucket_id: bucket index corresponding to the "assigned" value
> + * @active:the se is currently refcounted in a rq's bucket
>   *
>   * The bucket_id is the index of the clamp bucket matching the clamp value
>   * which is pre-computed and stored to avoid expensive integer divisions from
>   * the fast path.
> + *
> + * The active bit is set whenever a task has got an "effective" value 
> assigned,
> + * which can be different from the clamp value "requested" from user-space.
> + * This allows to know a task is refcounted in the rq's bucket corresponding
> + * to the "effective" bucket_id.
>   */
>  struct uclamp_se {
> unsigned int value  : bits_per(SCHED_CAPACITY_SCALE);
> unsigned int bucket_id  : bits_per(UCLAMP_BUCKETS);
> +   unsigned int active : 1;
>  };
>  #endif /* CONFIG_UCLAMP_TASK */
>
> @@ -676,6 +683,9 @@ struct task_struct {
> struct sched_dl_entity  dl;
>
>  #ifdef CONFIG_UCLAMP_TASK
> +   /* Clamp values requested for a scheduling entity */
> +   struct uclamp_seuclamp_req[UCLAMP_CNT];
> +   /* Effective clamp values used for a scheduling entity */
> struct uclamp_seuclamp[UCLAMP_CNT];
>  #endif
>
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index 99ce6d728df7..d4f6215ee03f 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int 
> write,
>  extern unsigned int sysctl_sched_rt_period;
>  extern int sysctl_sched_rt_runtime;
>
> +#ifdef CONFIG_UCLAMP_TASK
> +extern unsigned int sysctl_sched_uclamp_util_min;
> +extern unsigned int sysctl_sched_uclamp_util_max;
> +#endif
> +
>  #ifdef CONFIG_CFS_BANDWIDTH
>  extern unsigned int sysctl_sched_cfs_bandwidth_slice;
>  #endif
> @@ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int 
> write,
> void __user *buffer, size_t *lenp,
> loff_t *ppos);
>
> +#ifdef 

Re: [PATCH 4/5] mm: rework non-root kmem_cache lifecycle management

2019-04-17 Thread Roman Gushchin
On Wed, Apr 17, 2019 at 04:41:01PM -0700, Shakeel Butt wrote:
> On Wed, Apr 17, 2019 at 2:55 PM Roman Gushchin  wrote:
> >
> > This commit makes several important changes in the lifecycle
> > of a non-root kmem_cache, which also affect the lifecycle
> > of a memory cgroup.
> >
> > Currently each charged slab page has a page->mem_cgroup pointer
> > to the memory cgroup and holds a reference to it.
> > Kmem_caches are held by the cgroup. On offlining empty kmem_caches
> > are freed, all other are freed on cgroup release.
> 
> No, they are not freed (i.e. destroyed) on offlining, only
> deactivated. All memcg kmem_caches are freed/destroyed on memcg's
> css_free.

You're right, my bad. I was thinking about the corresponding sysfs entry
when was writing it. We try to free it from the deactivation path too.

> 
> >
> > So the current scheme can be illustrated as:
> > page->mem_cgroup->kmem_cache.
> >
> > To implement the slab memory reparenting we need to invert the scheme
> > into: page->kmem_cache->mem_cgroup.
> >
> > Let's make every page to hold a reference to the kmem_cache (we
> > already have a stable pointer), and make kmem_caches to hold a single
> > reference to the memory cgroup.
> 
> What about memcg_kmem_get_cache()? That function assumes that by
> taking reference on memcg, it's kmem_caches will stay. I think you
> need to get reference on the kmem_cache in memcg_kmem_get_cache()
> within the rcu lock where you get the memcg through css_tryget_online.

Yeah, a very good question.

I believe it's safe because css_tryget_online() guarantees that
the cgroup is online and won't go offline before css_free() in
slab_post_alloc_hook(). I do initialize kmem_cache's refcount to 1
and drop it on offlining, so it protects the online kmem_cache.

Thank you for looking into the patchset!


Re: kernel BUG at kernel/cred.c:434!

2019-04-17 Thread Casey Schaufler

On 4/17/2019 4:39 PM, Paul Moore wrote:

On Wed, Apr 17, 2019 at 12:27 PM Oleg Nesterov  wrote:

On 04/17, Paul Moore wrote:

On Wed, Apr 17, 2019 at 10:57 AM Oleg Nesterov  wrote:

On 04/17, Paul Moore wrote:

I'm tempted to simply return an error in selinux_setprocattr() if
the task's credentials are not the same as its real_cred;

What about other modules? I have no idea what smack_setprocattr() is,
but it too does prepare_creds/commit creds.

it seems that the simplest workaround should simply add the additional
cred == real_cred into proc_pid_attr_write().

Yes, that is simple, but I worry about what other LSMs might want to
do.  While I believe failing if the effective creds are not the same
as the real_creds is okay for SELinux (possibly Smack too), I worry
about what other LSMs may want to do.  After all,
proc_pid_attr_write() doesn't change the the creds itself, that is
something the specific LSMs do.

Yes, but if proc_pid_attr_write() is called with cred != real_cred then
something is already wrong?

True, or at least I would think so.

Looking at the current tree there are three LSMs which implement
setprocattr hooks: SELinux, Smack, and AppArmor.  I know Casey has
already mentioned that he wasn't able to trigger the problem in Smack,
but looking at smack_setprocattr() I see the similar commit_creds()
usage so I would expect the same problem in Smack; what say you Casey?


I say that my test program runs without ill effect. I call acct()
with "/proc/self/attr/current", which succeeds and enables accounting
just like it is supposed to. I then have the program open
"/proc/self/attr/current" and read it, all of which goes swimmingly.
When Smack frees a cred it usually does not free any memory of its
own, so it is conceivable that I'm just getting lucky. Or, I may not
have sufficient debug enabled.


  Looking at apparmor_setprocattr(), it appears that it too could end
up calling commit_creds() via aa_set_current_hat().

Since it looks like all three LSMs which implement the setprocattr
hook are vulnerable I'm open to the idea that proc_pid_attr_write() is
a better choice for the cred != read_cred check, but I would want to
make sure John and Casey are okay with that.

John?

Casey?


I'm fine with the change going into proc_pid_attr_write().



Re: [PATCH v3 06/16] PM / devfreq: tegra: Drop primary interrupt handler

2019-04-17 Thread Chanwoo Choi
On 19. 4. 18. 오전 7:29, Dmitry Osipenko wrote:
> There is no real need in the primary interrupt handler, hence move
> everything to the secondary (threaded) handler. In a result locking
> is consistent now and there are no potential races with the interrupt
> handler because it is protected with the devfreq's mutex.
> 
> Signed-off-by: Dmitry Osipenko 
> ---
>  drivers/devfreq/tegra-devfreq.c | 55 +++--
>  1 file changed, 18 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
> index 24ec65556c39..b65313fe3c2e 100644
> --- a/drivers/devfreq/tegra-devfreq.c
> +++ b/drivers/devfreq/tegra-devfreq.c
> @@ -144,7 +144,6 @@ static struct tegra_devfreq_device_config 
> actmon_device_configs[] = {
>  struct tegra_devfreq_device {
>   const struct tegra_devfreq_device_config *config;
>   void __iomem *regs;
> - spinlock_t lock;
>  
>   /* Average event count sampled in the last interrupt */
>   u32 avg_count;
> @@ -249,11 +248,8 @@ static void actmon_write_barrier(struct tegra_devfreq 
> *tegra)
>  static void actmon_isr_device(struct tegra_devfreq *tegra,
> struct tegra_devfreq_device *dev)
>  {
> - unsigned long flags;
>   u32 intr_status, dev_ctrl;
>  
> - spin_lock_irqsave(>lock, flags);
> -
>   dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT);
>   tegra_devfreq_update_avg_wmark(tegra, dev);
>  
> @@ -302,26 +298,6 @@ static void actmon_isr_device(struct tegra_devfreq 
> *tegra,
>   device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
>  
>   actmon_write_barrier(tegra);
> -
> - spin_unlock_irqrestore(>lock, flags);
> -}
> -
> -static irqreturn_t actmon_isr(int irq, void *data)
> -{
> - struct tegra_devfreq *tegra = data;
> - bool handled = false;
> - unsigned int i;
> - u32 val;
> -
> - val = actmon_readl(tegra, ACTMON_GLB_STATUS);
> - for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
> - if (val & tegra->devices[i].config->irq_mask) {
> - actmon_isr_device(tegra, tegra->devices + i);
> - handled = true;
> - }
> - }
> -
> - return handled ? IRQ_WAKE_THREAD : IRQ_NONE;
>  }
>  
>  static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
> @@ -348,15 +324,12 @@ static void actmon_update_target(struct tegra_devfreq 
> *tegra,
>   unsigned long cpu_freq = 0;
>   unsigned long static_cpu_emc_freq = 0;
>   unsigned int avg_sustain_coef;
> - unsigned long flags;
>  
>   if (dev->config->avg_dependency_threshold) {
>   cpu_freq = cpufreq_get(0);
>   static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
>   }
>  
> - spin_lock_irqsave(>lock, flags);
> -
>   dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD;
>   avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
>   dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef);
> @@ -364,19 +337,31 @@ static void actmon_update_target(struct tegra_devfreq 
> *tegra,
>  
>   if (dev->avg_count >= dev->config->avg_dependency_threshold)
>   dev->target_freq = max(dev->target_freq, static_cpu_emc_freq);
> -
> - spin_unlock_irqrestore(>lock, flags);
>  }
>  
>  static irqreturn_t actmon_thread_isr(int irq, void *data)
>  {
>   struct tegra_devfreq *tegra = data;
> + bool handled = false;
> + unsigned int i;
> + u32 val;
>  
>   mutex_lock(>devfreq->lock);
> - update_devfreq(tegra->devfreq);
> +
> + val = actmon_readl(tegra, ACTMON_GLB_STATUS);
> + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
> + if (val & tegra->devices[i].config->irq_mask) {
> + actmon_isr_device(tegra, tegra->devices + i);
> + handled = true;
> + }
> + }
> +
> + if (handled)
> + update_devfreq(tegra->devfreq);
> +
>   mutex_unlock(>devfreq->lock);
>  
> - return IRQ_HANDLED;
> + return handled ? IRQ_HANDLED : IRQ_NONE;
>  }
>  
>  static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
> @@ -386,7 +371,6 @@ static int tegra_actmon_rate_notify_cb(struct 
> notifier_block *nb,
>   struct tegra_devfreq *tegra;
>   struct tegra_devfreq_device *dev;
>   unsigned int i;
> - unsigned long flags;
>  
>   if (action != POST_RATE_CHANGE)
>   return NOTIFY_OK;
> @@ -398,9 +382,7 @@ static int tegra_actmon_rate_notify_cb(struct 
> notifier_block *nb,
>   for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
>   dev = >devices[i];
>  
> - spin_lock_irqsave(>lock, flags);
>   tegra_devfreq_update_wmark(tegra, dev);
> - spin_unlock_irqrestore(>lock, flags);
>   }
>  
>   actmon_write_barrier(tegra);
> @@ -682,7 +664,6 @@ static int tegra_devfreq_probe(struct platform_device 

Re: [PATCH v3 04/16] PM / devfreq: tegra: Don't ignore clk errors

2019-04-17 Thread Chanwoo Choi
On 19. 4. 18. 오전 7:29, Dmitry Osipenko wrote:
> The clk_set_min_rate() could fail and in this case clk_set_rate() sets
> rate to 0, which may drop EMC rate to minimum and make machine very
> difficult to use.
> 
> Signed-off-by: Dmitry Osipenko 
> ---
>  drivers/devfreq/tegra-devfreq.c | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
> index 7d7b9a5895b2..c7428c5eee23 100644
> --- a/drivers/devfreq/tegra-devfreq.c
> +++ b/drivers/devfreq/tegra-devfreq.c
> @@ -484,8 +484,10 @@ static int tegra_devfreq_target(struct device *dev, 
> unsigned long *freq,
>   u32 flags)
>  {
>   struct tegra_devfreq *tegra = dev_get_drvdata(dev);
> + struct devfreq *devfreq = tegra->devfreq;
>   struct dev_pm_opp *opp;
>   unsigned long rate;
> + int err;
>  
>   opp = devfreq_recommended_opp(dev, freq, flags);
>   if (IS_ERR(opp)) {
> @@ -495,10 +497,20 @@ static int tegra_devfreq_target(struct device *dev, 
> unsigned long *freq,
>   rate = dev_pm_opp_get_freq(opp);
>   dev_pm_opp_put(opp);
>  
> - clk_set_min_rate(tegra->emc_clock, rate);
> - clk_set_rate(tegra->emc_clock, 0);
> + err = clk_set_min_rate(tegra->emc_clock, rate);
> + if (err)
> + return err;
> +
> + err = clk_set_rate(tegra->emc_clock, 0);
> + if (err)
> + goto restore_min_rate;
>  
>   return 0;
> +
> +restore_min_rate:
> + clk_set_min_rate(tegra->emc_clock, devfreq->previous_freq);
> +
> + return err;
>  }
>  
>  static int tegra_devfreq_get_dev_status(struct device *dev,
> 

Reviewed-by: Chanwoo Choi 

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics


Re: [PATCH v3 15/16] PM / devfreq: tegra: Rename tegra-devfreq.c to tegra30-devfreq.c

2019-04-17 Thread Chanwoo Choi
On 19. 4. 18. 오전 7:29, Dmitry Osipenko wrote:
> In order to reflect that driver serves NVIDIA Tegra30 and later SoC
> generations, let's rename the driver's source file to "tegra30-devfreq.c".
> This will make driver files to look more consistent after addition of a
> driver for Tegra20.
> 
> Signed-off-by: Dmitry Osipenko 
> ---
>  drivers/devfreq/Makefile   | 2 +-
>  drivers/devfreq/{tegra-devfreq.c => tegra30-devfreq.c} | 0
>  2 files changed, 1 insertion(+), 1 deletion(-)
>  rename drivers/devfreq/{tegra-devfreq.c => tegra30-devfreq.c} (100%)
> 
> diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
> index 32b8d4d3f12c..47e5aeeebfd1 100644
> --- a/drivers/devfreq/Makefile
> +++ b/drivers/devfreq/Makefile
> @@ -10,7 +10,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)   += governor_passive.o
>  # DEVFREQ Drivers
>  obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
>  obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o
> -obj-$(CONFIG_ARM_TEGRA_DEVFREQ)  += tegra-devfreq.o
> +obj-$(CONFIG_ARM_TEGRA_DEVFREQ)  += tegra30-devfreq.o
>  
>  # DEVFREQ Event Drivers
>  obj-$(CONFIG_PM_DEVFREQ_EVENT)   += event/
> diff --git a/drivers/devfreq/tegra-devfreq.c 
> b/drivers/devfreq/tegra30-devfreq.c
> similarity index 100%
> rename from drivers/devfreq/tegra-devfreq.c
> rename to drivers/devfreq/tegra30-devfreq.c
> 

Reviewed-by: Chanwoo Choi 

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics


Re: [PATCH v3 13/16] PM / devfreq: tegra: Support Tegra30

2019-04-17 Thread Chanwoo Choi
On 19. 4. 18. 오전 7:29, Dmitry Osipenko wrote:
> The devfreq driver can be used on Tegra30 without any code change and
> it works perfectly fine, the default Tegra124 parameters are good enough
> for Tegra30.
> 
> Signed-off-by: Dmitry Osipenko 
> ---
>  drivers/devfreq/Kconfig | 4 ++--
>  drivers/devfreq/tegra-devfreq.c | 1 +
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
> index a78dffe603c1..56db9dc05edb 100644
> --- a/drivers/devfreq/Kconfig
> +++ b/drivers/devfreq/Kconfig
> @@ -92,8 +92,8 @@ config ARM_EXYNOS_BUS_DEVFREQ
> This does not yet operate with optimal voltages.
>  
>  config ARM_TEGRA_DEVFREQ
> - tristate "Tegra DEVFREQ Driver"
> - depends on ARCH_TEGRA_124_SOC
> + tristate "NVIDIA Tegra30/114/124/210 DEVFREQ Driver"
> + depends on ARCH_TEGRA
>   select PM_OPP
>   help
> This adds the DEVFREQ driver for the Tegra family of SoCs.
> diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
> index e9ab49394d35..029afc5fb6a9 100644
> --- a/drivers/devfreq/tegra-devfreq.c
> +++ b/drivers/devfreq/tegra-devfreq.c
> @@ -738,6 +738,7 @@ static int tegra_devfreq_remove(struct platform_device 
> *pdev)
>  }
>  
>  static const struct of_device_id tegra_devfreq_of_match[] = {
> + { .compatible = "nvidia,tegra30-actmon" },
>   { .compatible = "nvidia,tegra124-actmon" },
>   { },
>  };
> 

Reviewed-by: Chanwoo Choi 

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics


Re: kernel BUG at kernel/cred.c:434!

2019-04-17 Thread John Johansen
On 4/17/19 4:39 PM, Paul Moore wrote:
> On Wed, Apr 17, 2019 at 12:27 PM Oleg Nesterov  wrote:
>> On 04/17, Paul Moore wrote:
>>>
>>> On Wed, Apr 17, 2019 at 10:57 AM Oleg Nesterov  wrote:
 On 04/17, Paul Moore wrote:
>
> I'm tempted to simply return an error in selinux_setprocattr() if
> the task's credentials are not the same as its real_cred;

 What about other modules? I have no idea what smack_setprocattr() is,
 but it too does prepare_creds/commit creds.

 it seems that the simplest workaround should simply add the additional
 cred == real_cred into proc_pid_attr_write().
>>>
>>> Yes, that is simple, but I worry about what other LSMs might want to
>>> do.  While I believe failing if the effective creds are not the same
>>> as the real_creds is okay for SELinux (possibly Smack too), I worry
>>> about what other LSMs may want to do.  After all,
>>> proc_pid_attr_write() doesn't change the the creds itself, that is
>>> something the specific LSMs do.
>>
>> Yes, but if proc_pid_attr_write() is called with cred != real_cred then
>> something is already wrong?
> 
> True, or at least I would think so.
> 
> Looking at the current tree there are three LSMs which implement
> setprocattr hooks: SELinux, Smack, and AppArmor.  I know Casey has
> already mentioned that he wasn't able to trigger the problem in Smack,
> but looking at smack_setprocattr() I see the similar commit_creds()
> usage so I would expect the same problem in Smack; what say you Casey?
>  Looking at apparmor_setprocattr(), it appears that it too could end
> up calling commit_creds() via aa_set_current_hat().
> 
> Since it looks like all three LSMs which implement the setprocattr
> hook are vulnerable I'm open to the idea that proc_pid_attr_write() is
> a better choice for the cred != read_cred check, but I would want to
> make sure John and Casey are okay with that.
> 
> John?

heh yeah,

seems I messed up our check, we actually have

if (current_cred() != current_real_cred())
return -EBUSY;

on the change_profile path and missed it on the change_hat
one.

It makes sense to lift the check up earlier



Re: [PATCH v8 12/16] sched/core: uclamp: Extend CPU's cgroup controller

2019-04-17 Thread Suren Baghdasaryan
On Tue, Apr 2, 2019 at 3:43 AM Patrick Bellasi  wrote:
>
> The cgroup CPU bandwidth controller allows to assign a specified
> (maximum) bandwidth to the tasks of a group. However this bandwidth is
> defined and enforced only on a temporal base, without considering the
> actual frequency a CPU is running on. Thus, the amount of computation
> completed by a task within an allocated bandwidth can be very different
> depending on the actual frequency the CPU is running that task.
> The amount of computation can be affected also by the specific CPU a
> task is running on, especially when running on asymmetric capacity
> systems like Arm's big.LITTLE.
>
> With the availability of schedutil, the scheduler is now able
> to drive frequency selections based on actual task utilization.
> Moreover, the utilization clamping support provides a mechanism to
> bias the frequency selection operated by schedutil depending on
> constraints assigned to the tasks currently RUNNABLE on a CPU.
>
> Giving the mechanisms described above, it is now possible to extend the
> cpu controller to specify the minimum (or maximum) utilization which
> should be considered for tasks RUNNABLE on a cpu.
> This makes it possible to better defined the actual computational
> power assigned to task groups, thus improving the cgroup CPU bandwidth
> controller which is currently based just on time constraints.
>
> Extend the CPU controller with a couple of new attributes util.{min,max}
> which allows to enforce utilization boosting and capping for all the
> tasks in a group. Specifically:
>
> - util.min: defines the minimum utilization which should be considered
> i.e. the RUNNABLE tasks of this group will run at least at a
>  minimum frequency which corresponds to the util.min
>  utilization
>
> - util.max: defines the maximum utilization which should be considered
> i.e. the RUNNABLE tasks of this group will run up to a
>  maximum frequency which corresponds to the util.max
>  utilization
>
> These attributes:
>
> a) are available only for non-root nodes, both on default and legacy
>hierarchies, while system wide clamps are defined by a generic
>interface which does not depends on cgroups. This system wide
>interface enforces constraints on tasks in the root node.
>
> b) enforce effective constraints at each level of the hierarchy which
>are a restriction of the group requests considering its parent's
>effective constraints. Root group effective constraints are defined
>by the system wide interface.
>This mechanism allows each (non-root) level of the hierarchy to:
>- request whatever clamp values it would like to get
>- effectively get only up to the maximum amount allowed by its parent
>
> c) have higher priority than task-specific clamps, defined via
>sched_setattr(), thus allowing to control and restrict task requests
>
> Add two new attributes to the cpu controller to collect "requested"
> clamp values. Allow that at each non-root level of the hierarchy.
> Validate local consistency by enforcing util.min < util.max.
> Keep it simple by do not caring now about "effective" values computation
> and propagation along the hierarchy.
>
> Signed-off-by: Patrick Bellasi 
> Cc: Ingo Molnar 
> Cc: Peter Zijlstra 
> Cc: Tejun Heo 
>
> --
> Changes in v8:
>  Message-ID: <20190214154817.gn50...@devbig004.ftw2.facebook.com>
>  - update changelog description for points b), c) and following paragraph
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  27 +
>  init/Kconfig|  22 
>  kernel/sched/core.c | 142 +++-
>  kernel/sched/sched.h|   6 +
>  4 files changed, 196 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/admin-guide/cgroup-v2.rst 
> b/Documentation/admin-guide/cgroup-v2.rst
> index 7bf3f129c68b..47710a77f4fa 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -909,6 +909,12 @@ controller implements weight and absolute bandwidth 
> limit models for
>  normal scheduling policy and absolute bandwidth allocation model for
>  realtime scheduling policy.
>
> +Cycles distribution is based, by default, on a temporal base and it
> +does not account for the frequency at which tasks are executed.
> +The (optional) utilization clamping support allows to enforce a minimum
> +bandwidth, which should always be provided by a CPU, and a maximum bandwidth,
> +which should never be exceeded by a CPU.
> +
>  WARNING: cgroup2 doesn't yet support control of realtime processes and
>  the cpu controller can only be enabled when all RT processes are in
>  the root cgroup.  Be aware that system management software may already
> @@ -974,6 +980,27 @@ All time durations are in microseconds.
> Shows pressure stall information for CPU. See
> Documentation/accounting/psi.txt for 

Re: [PATCH V2 0/3] Introduce Thermal Pressure

2019-04-17 Thread Thara Gopinath
On 04/17/2019 02:29 PM, Ingo Molnar wrote:
> 
> * Thara Gopinath  wrote:
> 
>>
>> On 04/17/2019 01:36 AM, Ingo Molnar wrote:
>>>
>>> * Thara Gopinath  wrote:
>>>
 The test results below shows 3-5% improvement in performance when
 using the third solution compared to the default system today where
 scheduler is unware of cpu capacity limitations due to thermal events.
>>>
>>> The numbers look very promising!
>>
>> Hello Ingo,
>> Thank you for the review.
>>>
>>> I've rearranged the results to make the performance properties of the 
>>> various approaches and parameters easier to see:
>>>
>>>  (seconds, lower is better)
>>>
>>>  Hackbench   Aobench   Dhrystone
>>>  =   ===   =
>>> Vanilla kernel (No Thermal Pressure) 10.21141.581.14
>>> Instantaneous thermal pressure   10.16141.631.15
>>> Thermal Pressure Averaging:
>>>   - PELT fmwk 9.88134.481.19
>>>   - non-PELT Algo. Decay : 500 ms 9.94133.621.09
>>>   - non-PELT Algo. Decay : 250 ms 7.52137.221.012
>>>   - non-PELT Algo. Decay : 125 ms 9.87137.551.12
>>>
>>>
>>> Firstly, a couple of questions about the numbers:
>>>
>>>1)
>>>
>>>   Is the 1.012 result for "non-PELT 250 msecs Dhrystone" really 1.012?
>>>   You reported it as:
>>>
>>>  non-PELT Algo. Decay : 250 ms   1.012   7.02%
>>
>> It is indeed 1.012. So, I ran the "non-PELT Algo 250 ms" benchmarks
>> multiple time because of the anomalies noticed.  1.012 is a formatting
>> error on my part when I copy pasted the results into a google sheet I am
>> maintaining to capture the test results. Sorry about the confusion.
> 
> That's actually pretty good, because it suggests a 35% and 15% 
> improvement over the vanilla kernel - which is very good for such 
> CPU-bound workloads.
> 
> Not that 5% is bad in itself - but 15% is better ;-)
> 
>> Regarding the decay period, I agree that more testing can be done. I 
>> like your suggestions below and I am going to try implementing them 
>> sometime next week. Once I have some solid results, I will send them 
>> out.
> 
> Thanks!
> 
>> My concern regarding getting hung up too much on decay period is that I 
>> think it could vary from SoC to SoC depending on the type and number of 
>> cores and thermal characteristics. So I was thinking eventually the 
>> decay period should be configurable via a config option or by any other 
>> means. Testing on different systems will definitely help and maybe I am 
>> wrong and there is no much variation between systems.
> 
> Absolutely, so I'd not be against keeping it a SCHED_DEBUG tunable or so, 
> until there's a better understanding of how the physical properties of 
> the SoC map to an ideal decay period.
> 
> Assuming PeterZ & Rafael & Quentin doesn't hate the whole thermal load 
> tracking approach. I suppose there's some connection of this to Energy 
> Aware Scheduling? Or not ...
Mmm.. Not so much. This does not have much to do with EAS. The feature
itself will be really useful if there are asymmetric cpus in the  system
rather than symmetric cpus. In case of SMP, since all cores have same
capacity and assuming any thermal mitigation will be implemented across
the all the cpus, there won't be any different scheduler behavior.

Regards
Thara
> 
> Thanks,
> 
>   Ingo
> 


-- 
Regards
Thara


Re: [PATCH v2] panic: add an option to replay all the printk message in buffer

2019-04-17 Thread Sergey Senozhatsky
On (04/17/19 23:18), Feng Tang wrote:
> > > +++ b/kernel/printk/printk.c
> > > @@ -2549,6 +2549,14 @@ void console_flush_on_panic(void)
> > >*/
> > >   console_trylock();
> > >   console_may_schedule = 0;
> > > + if (flush_mode == CONSOLE_FLUSH_ALL) {
> > > + /*
> > > +  * Can be done under logbuf lock, but it's unlikely that
> > > +  * we will have any race conditions here.
> > > +  */
> > > + console_seq = log_first_seq;
> > > + console_idx = log_first_idx;
> 
> This is very similar to my V1 patch :), excepted I used a bool
> as the parameter.

Yes it is :)

I will reply to Petr's and Feng's email.

> > I agree that it is easier. The cost is that the same messages are
> > printed again without any explanation.
> > 
> > I still think that it would be convenient to write a header line.
> > It would help to understand the log for any, even 3rd-party, reader.
> > Also it would help to find the beginning in a very long log.
>
> My thought is, the replay is only a debug option and disabled by default,
> so when user specifically enable the bit of PANIC_PRINT_ALL_PRINTK_MSG,
> the whole replay of printk msg should be expected.

I think that PANIC_PRINT_ALL_PRINTK_MSG is a debugging option; a quite
specific one. So people who ask the kernel to PANIC_PRINT_ALL_PRINTK_MSG
they know what they are doing, and we probably will not cofuse anyone.
After all, we don't print any headers when we ftrace_dump() or imitate
sysrq via sysrq_timer_list_show(), or for any other panic_print_sys_info()
printouts. So it's OK to just do the simple thing for
PANIC_PRINT_ALL_PRINTK_MSG.

-ss


[PATCH v2] ktest: Add support for meta characters in GRUB_MENU

2019-04-17 Thread Masayoshi Mizuma
From: Masayoshi Mizuma 

ktest fails if meta characters are in GRUB_MENU, for example
GRUB_MENU = 'Fedora (test)'

The failure happens because the meta characters are not escaped,
so the menu doesn't match in any entries in GRUB_FILE.

Use quotemeta() to escape the meta characters.

Signed-off-by: Masayoshi Mizuma 
---
 tools/testing/ktest/ktest.pl | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 87af8a68ab25..fc6140d45280 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1866,9 +1866,10 @@ sub get_grub2_index {
or dodie "unable to get $grub_file";
 
 my $found = 0;
+my $grub_menu_qt = quotemeta($grub_menu);
 
 while () {
-   if (/^menuentry.*$grub_menu/) {
+   if (/^menuentry.*$grub_menu_qt/) {
$grub_number++;
$found = 1;
last;
@@ -1909,9 +1910,10 @@ sub get_grub_index {
or dodie "unable to get menu.lst";
 
 my $found = 0;
+my $grub_menu_qt = quotemeta($grub_menu);
 
 while () {
-   if (/^\s*title\s+$grub_menu\s*$/) {
+   if (/^\s*title\s+$grub_menu_qt\s*$/) {
$grub_number++;
$found = 1;
last;
-- 
2.20.1



Re: [PATCH] clk: ingenic/jz4725b: Fix parent of pixel clock

2019-04-17 Thread Paul Cercueil

Hi Stephen,

Le jeu. 18 avril 2019 à 1:48, Stephen Boyd  a écrit 
:

Quoting Paul Cercueil (2019-04-17 04:24:20)
 The pixel clock is directly connected to the output of the PLL, and 
not

 to the /2 divider.

 Cc: sta...@vger.kernel.org
 Fixes: 226dfa4726eb ("clk: Add Ingenic jz4725b CGU driver")
 Signed-off-by: Paul Cercueil 
 ---


Is this breaking something in 5.1-rc series? Or just found by
inspection? I'm trying to understand the priority of this patch.


I verified it with the hardware. It fixes a bug that has been present
since the introduction of this driver.

However until now nothing uses this particular clock so it can go to 
5.2.


-Paul




RE: [PATCH] smp: Do not warn if smp_call_function_single() is doing a self call.

2019-04-17 Thread Dexuan Cui
> From: Thomas Gleixner 
> Sent: Tuesday, April 16, 2019 1:13 PM
> > ...
> > True. And before we start digging deeper into this, let's step back: why
> > do we need to do clockevents_unbind_device() on hybernation? Can we just
> > disable the device and re-enable it back on resume?

We do clockevents_unbind_device as part of hv_synic_cleanup(), which is
called as a CPU hotplug callback: see vmbus_bus_init():
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);

Yes, it looks the right thing is to implement the suspend/resume callbacks of
the clock_event_device. Thank you for the suggestion! I'll look into this.

> Yes. That's the right thing to do. Simple solution is to implement the
> suspend/resume callbacks on the clock events device and be done with it.

Agreed.

> > Actually, all usages of clockevents_unbind_device() in kernel are
> > limited to Hyper-V and with Michael's patches moving this out of VMBus
> > driver I think it can go away completely.

Thanks for the heads-up! I'll rebase to Michael's patches.
 
> Correct. There was a driver which required that, but that's gone by now and
> of course nobody noticed that it was the last user. The reason why this
> exists was to allow switching out an active clocksource similar to the
> sysfs unbind file but without user space interaction.
> 
>   tglx

Thanks for the background sharing!

- Dexuan


Re: [PATCH] clk: ingenic/jz4725b: Fix parent of pixel clock

2019-04-17 Thread Stephen Boyd
Quoting Paul Cercueil (2019-04-17 04:24:20)
> The pixel clock is directly connected to the output of the PLL, and not
> to the /2 divider.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 226dfa4726eb ("clk: Add Ingenic jz4725b CGU driver")
> Signed-off-by: Paul Cercueil 
> ---

Is this breaking something in 5.1-rc series? Or just found by
inspection? I'm trying to understand the priority of this patch.



Re: [PATCH] i2c: iproc: Change driver to use 'BIT' macro

2019-04-17 Thread Ray Jui



On 4/14/2019 11:56 PM, Peter Rosin wrote:
> On 2019-04-13 00:59, Peter Rosin wrote:
>> On 2019-04-03 23:05, Ray Jui wrote:
>>> Change the iProc I2C driver to use the 'BIT' macro from all '1 << XXX'
>>> bit operations to get rid of compiler warning and improve readability of
>>> the code
>>
>> All? I see lots more '1 << XXX_SHIFT' matches. I might be behind though?
> 
> I verified that, and yes indeed, I was behind. That said, see below...
> 

Right. Previous change that this change depends on is already queued in
i2c/for-next.

>> Anyway, if you are cleaning up, I'm just flagging that BIT(XXX_SHIFT) looks
>> a bit clunky to me. You might consider renaming all those single-bit
>> XXX_SHIFT macros to simple be
>>
>> #define XXX BIT()
>>
>> instead of
>>
>> #define XXX_SHIFT 
>>
>> but that triggers more churn, so is obviously more error prone. You might
>> not dare it?
>>

With the current code, I don't see how that is cleaner. With XXX_SHIFT
specified, it makes it very clear to the user that the define a for a
bit location within a register. You can argue and say it makes the
define longer, but not less clear.

>> Cheers,
>> Peter
>>
>>> Signed-off-by: Ray Jui 
>>> ---
>>>  drivers/i2c/busses/i2c-bcm-iproc.c | 6 +++---
>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c 
>>> b/drivers/i2c/busses/i2c-bcm-iproc.c
>>> index 562942d0c05c..a845b8decac8 100644
>>> --- a/drivers/i2c/busses/i2c-bcm-iproc.c
>>> +++ b/drivers/i2c/busses/i2c-bcm-iproc.c
>>> @@ -717,7 +717,7 @@ static int bcm_iproc_i2c_xfer_single_msg(struct 
>>> bcm_iproc_i2c_dev *iproc_i2c,
>>>  
>>> /* mark the last byte */
>>> if (i == msg->len - 1)
>>> -   val |= 1 << M_TX_WR_STATUS_SHIFT;
>>> +   val |= BIT(M_TX_WR_STATUS_SHIFT);
>>>  
>>> iproc_i2c_wr_reg(iproc_i2c, M_TX_OFFSET, val);
>>> }
>>> @@ -844,7 +844,7 @@ static int bcm_iproc_i2c_cfg_speed(struct 
>>> bcm_iproc_i2c_dev *iproc_i2c)
>>>  
>>> iproc_i2c->bus_speed = bus_speed;
>>> val = iproc_i2c_rd_reg(iproc_i2c, TIM_CFG_OFFSET);
>>> -   val &= ~(1 << TIM_CFG_MODE_400_SHIFT);
>>> +   val &= ~BIT(TIM_CFG_MODE_400_SHIFT);
>>> val |= (bus_speed == 40) << TIM_CFG_MODE_400_SHIFT;
> 
> These two statements now no longer "match". One uses BIT and the other open
> codes the shift. I think that's bad. Losing the _SHIFT suffix and including
> BIT in the macro expansion, as suggested above, yields:
> 
>   val &= ~TIM_CFG_MODE_400;
>   if (bus_speed == 40)
>   val |= TIM_CFG_MODE_400;
> 
> which is perhaps one more line, but also more readable IMO.
> 

A single line with evaluation embedded is nice and clean to me. I guess
this is subjective.

I'll leave the decision to Wolfram. If he also prefers the above change
to be made, sure. Otherwise, I'll leave it as it is.

> But all this is of course in deep nit-pick-territory...
> 
> Cheers,
> Peter
> 

Thanks,

Ray


Re: [PATCH 4/5] mm: rework non-root kmem_cache lifecycle management

2019-04-17 Thread Shakeel Butt
On Wed, Apr 17, 2019 at 2:55 PM Roman Gushchin  wrote:
>
> This commit makes several important changes in the lifecycle
> of a non-root kmem_cache, which also affect the lifecycle
> of a memory cgroup.
>
> Currently each charged slab page has a page->mem_cgroup pointer
> to the memory cgroup and holds a reference to it.
> Kmem_caches are held by the cgroup. On offlining empty kmem_caches
> are freed, all other are freed on cgroup release.

No, they are not freed (i.e. destroyed) on offlining, only
deactivated. All memcg kmem_caches are freed/destroyed on memcg's
css_free.

>
> So the current scheme can be illustrated as:
> page->mem_cgroup->kmem_cache.
>
> To implement the slab memory reparenting we need to invert the scheme
> into: page->kmem_cache->mem_cgroup.
>
> Let's make every page to hold a reference to the kmem_cache (we
> already have a stable pointer), and make kmem_caches to hold a single
> reference to the memory cgroup.

What about memcg_kmem_get_cache()? That function assumes that by
taking reference on memcg, it's kmem_caches will stay. I think you
need to get reference on the kmem_cache in memcg_kmem_get_cache()
within the rcu lock where you get the memcg through css_tryget_online.

>
> To make this possible we need to introduce a new refcounter
> for non-root kmem_caches. It's atomic for now, but can be easily
> converted to a percpu counter, had we any performance penalty*.
> The initial value is set to 1, and it's decremented on deactivation,
> so we never shutdown an active cache.
>
> To shutdown non-active empty kmem_caches, let's reuse the
> infrastructure of the RCU-delayed work queue, used previously for
> the deactivation. After the generalization, it's perfectly suited
> for our needs.
>
> Since now we can release a kmem_cache at any moment after the
> deactivation, let's call sysfs_slab_remove() only from the shutdown
> path. It makes deactivation path simpler.
>
> Because we don't set the page->mem_cgroup pointer, we need to change
> the way how memcg-level stats is working for slab pages. We can't use
> mod_lruvec_page_state() helpers anymore, so switch over to
> mod_lruvec_state().
>
> * I used the following simple approach to test the performance
> (stolen from another patchset by T. Harding):
>
> time find / -name fname-no-exist
> echo 2 > /proc/sys/vm/drop_caches
> repeat several times
>
> Results (I've chosen best results in several runs):
>
> orig   patched
>
> real0m0.712s   0m0.690s
> user0m0.104s   0m0.101s
> sys 0m0.346s   0m0.340s
>
> real0m0.728s   0m0.723s
> user0m0.114s   0m0.115s
> sys 0m0.342s   0m0.338s
>
> real0m0.685s   0m0.767s
> user0m0.118s   0m0.114s
> sys 0m0.343s   0m0.336s
>
> So it looks like the difference is not noticeable in this test.
>
> Signed-off-by: Roman Gushchin 
> ---
>  include/linux/slab.h |  2 +-
>  mm/memcontrol.c  |  9 
>  mm/slab.c| 15 +---
>  mm/slab.h| 54 +---
>  mm/slab_common.c | 51 +
>  mm/slub.c| 22 +-
>  6 files changed, 79 insertions(+), 74 deletions(-)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 47923c173f30..4daaade76c63 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -152,7 +152,6 @@ int kmem_cache_shrink(struct kmem_cache *);
>
>  void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
>  void memcg_deactivate_kmem_caches(struct mem_cgroup *);
> -void memcg_destroy_kmem_caches(struct mem_cgroup *);
>
>  /*
>   * Please use this macro to create slab caches. Simply specify the
> @@ -641,6 +640,7 @@ struct memcg_cache_params {
> struct mem_cgroup *memcg;
> struct list_head children_node;
> struct list_head kmem_caches_node;
> +   atomic_long_t refcnt;
>
> void (*work_fn)(struct kmem_cache *);
> union {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b2c39f187cbb..87c06e342e05 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2719,9 +2719,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t 
> gfp, int order,
> cancel_charge(memcg, nr_pages);
> return -ENOMEM;
> }
> -
> -   page->mem_cgroup = memcg;
> -
> return 0;
>  }
>
> @@ -2744,8 +2741,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, 
> int order)
> memcg = get_mem_cgroup_from_current();
> if (!mem_cgroup_is_root(memcg)) {
> ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
> -   if (!ret)
> +   if (!ret) {
> +   page->mem_cgroup = memcg;
> __SetPageKmemcg(page);
> +   }
> }
> css_put(>css);
> return ret;
> @@ -3238,7 +3237,7 @@ 

[PATCH 2/2] drm/panel: add panel CDTech S050WV43-CT5 to panel-simple

2019-04-17 Thread Florent TOMASIN
This patch adds support for CDTech S050WV43-CT5 800x480 5" panel to DRM
simple panel driver.

Signed-off-by: Florent TOMASIN 
---
 drivers/gpu/drm/panel/panel-simple.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 9e8218f6a3f2..486f61d324fb 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -917,6 +917,30 @@ static const struct panel_desc cdtech_s043wq26h_ct7 = {
.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
 };
 
+static const struct drm_display_mode cdtech_s050wv43_ct5_mode = {
+   .clock = 3,
+   .hdisplay = 800,
+   .hsync_start = 800 + 40,
+   .hsync_end = 800 + 40 + 48,
+   .htotal = 800 + 40 + 48 + 88,
+   .vdisplay = 480,
+   .vsync_start = 480 + 13,
+   .vsync_end = 480 + 13 + 3,
+   .vtotal = 480 + 13 + 3 + 32,
+   .vrefresh = 60,
+   .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc cdtech_s050wv43_ct5 = {
+   .modes = _s050wv43_ct5_mode,
+   .num_modes = 1,
+   .bpc = 8,
+   .size = {
+   .width = 108,
+   .height = 64,
+   },
+};
+
 static const struct drm_display_mode cdtech_s070wv95_ct16_mode = {
.clock = 35000,
.hdisplay = 800,
@@ -2598,6 +2622,9 @@ static const struct of_device_id platform_of_match[] = {
}, {
.compatible = "cdtech,s043wq26h-ct7",
.data = _s043wq26h_ct7,
+   }, {
+   .compatible = "cdtech,s050wv43-ct5",
+   .data = _s050wv43_ct5,
}, {
.compatible = "cdtech,s070wv95-ct16",
.data = _s070wv95_ct16,
-- 
2.17.1



Re: kernel BUG at kernel/cred.c:434!

2019-04-17 Thread Paul Moore
On Wed, Apr 17, 2019 at 12:27 PM Oleg Nesterov  wrote:
> On 04/17, Paul Moore wrote:
> >
> > On Wed, Apr 17, 2019 at 10:57 AM Oleg Nesterov  wrote:
> > > On 04/17, Paul Moore wrote:
> > > >
> > > > I'm tempted to simply return an error in selinux_setprocattr() if
> > > > the task's credentials are not the same as its real_cred;
> > >
> > > What about other modules? I have no idea what smack_setprocattr() is,
> > > but it too does prepare_creds/commit creds.
> > >
> > > it seems that the simplest workaround should simply add the additional
> > > cred == real_cred into proc_pid_attr_write().
> >
> > Yes, that is simple, but I worry about what other LSMs might want to
> > do.  While I believe failing if the effective creds are not the same
> > as the real_creds is okay for SELinux (possibly Smack too), I worry
> > about what other LSMs may want to do.  After all,
> > proc_pid_attr_write() doesn't change the the creds itself, that is
> > something the specific LSMs do.
>
> Yes, but if proc_pid_attr_write() is called with cred != real_cred then
> something is already wrong?

True, or at least I would think so.

Looking at the current tree there are three LSMs which implement
setprocattr hooks: SELinux, Smack, and AppArmor.  I know Casey has
already mentioned that he wasn't able to trigger the problem in Smack,
but looking at smack_setprocattr() I see the similar commit_creds()
usage so I would expect the same problem in Smack; what say you Casey?
 Looking at apparmor_setprocattr(), it appears that it too could end
up calling commit_creds() via aa_set_current_hat().

Since it looks like all three LSMs which implement the setprocattr
hook are vulnerable I'm open to the idea that proc_pid_attr_write() is
a better choice for the cred != read_cred check, but I would want to
make sure John and Casey are okay with that.

John?

Casey?

-- 
paul moore
www.paul-moore.com


Re: [PATCH v5] gcov: Clang support

2019-04-17 Thread Tri Vo
On Wed, Apr 17, 2019 at 4:21 PM Andrew Morton  wrote:
>
> On Wed, 17 Apr 2019 15:53:28 -0700 Tri Vo  wrote:
>
> > LLVM uses profiling data that's deliberately similar to GCC, but has a very
> > different way of exporting that data.  LLVM calls llvm_gcov_init() once per
> > module, and provides a couple of callbacks that we can use to ask for more
> > data.
> >
> > We care about the "writeout" callback, which in turn calls back into
> > compiler-rt/this module to dump all the gathered coverage data to disk:
> >
> >llvm_gcda_start_file()
> >  llvm_gcda_emit_function()
> >  llvm_gcda_emit_arcs()
> >  llvm_gcda_emit_function()
> >  llvm_gcda_emit_arcs()
> >  [... repeats for each function ...]
> >llvm_gcda_summary_info()
> >llvm_gcda_end_file()
> >
> > This design is much more stateless and unstructured than gcc's, and is
> > intended to run at process exit.  This forces us to keep some local state
> > about which module we're dealing with at the moment.  On the other hand, it
> > also means we don't depend as much on how LLVM represents profiling data
> > internally.
> >
> > See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more
> > details on how this works, particularly GCOVProfiler::emitProfileArcs(),
> > GCOVProfiler::insertCounterWriteout(), and GCOVProfiler::insertFlush().
>
> checkpatch speaketh truth.
>
> Also, I'll change those Co-authored-by's to the documented Co-developed-by.
>
>
> From: Andrew Morton 
> Subject: gcov-clang-support-checkpatch-fixes
>
> WARNING: Non-standard signature: Co-authored-by:
> #31:
> Co-authored-by: Nick Desaulniers 
>
> WARNING: Non-standard signature: Co-authored-by:
> #32:
> Co-authored-by: Tri Vo 
>
> WARNING: Possible unnecessary 'out of memory' message
> #158: FILE: kernel/gcov/clang.c:90:
> +   if (!info) {
> +   pr_warn_ratelimited("failed to allocate gcov info\n");
>
> WARNING: Possible unnecessary 'out of memory' message
> #193: FILE: kernel/gcov/clang.c:125:
> +   if (!info) {
> +   pr_warn_ratelimited("failed to allocate gcov function info 
> for %s\n",
>
> WARNING: line over 80 characters
> #546: FILE: kernel/gcov/clang.c:478:
> +   pos += store_gcov_u32(buffer, pos, 
> fi_ptr->cfg_checksum);
>
> total: 0 errors, 5 warnings, 663 lines checked
>
> NOTE: For some of the reported defects, checkpatch may be able to
>   mechanically convert to the typical style using --fix or --fix-inplace.
>
> ./patches/gcov-clang-support.patch has style problems, please review.
>
> NOTE: If any of the errors are false positives, please report
>   them to the maintainer, see CHECKPATCH in MAINTAINERS.
>
> Please run checkpatch prior to sending patches

Thanks! I'll keep that in mind.


[PATCH 1/2] dt-bindings: Add CDTech S050WV43-CT5 panel bindings

2019-04-17 Thread Florent TOMASIN
Add documentation for S050WV43-CT5 panel

Signed-off-by: Florent TOMASIN 
---
 .../bindings/display/panel/cdtech,s050wv43-ct5.txt   | 12 
 1 file changed, 12 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/cdtech,s050wv43-ct5.txt

diff --git 
a/Documentation/devicetree/bindings/display/panel/cdtech,s050wv43-ct5.txt 
b/Documentation/devicetree/bindings/display/panel/cdtech,s050wv43-ct5.txt
new file mode 100644
index ..0e3313b68d9e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/cdtech,s050wv43-ct5.txt
@@ -0,0 +1,12 @@
+CDTech(H.K.) Electronics Limited 5" 800x480 color TFT-LCD panel
+
+Required properties:
+- compatible: should be "cdtech,s050wv43-ct5"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
-- 
2.17.1



Re: [PATCH v5] gcov: Clang support

2019-04-17 Thread Andrew Morton
On Wed, 17 Apr 2019 15:53:28 -0700 Tri Vo  wrote:

> LLVM uses profiling data that's deliberately similar to GCC, but has a very
> different way of exporting that data.  LLVM calls llvm_gcov_init() once per
> module, and provides a couple of callbacks that we can use to ask for more
> data.
> 
> We care about the "writeout" callback, which in turn calls back into
> compiler-rt/this module to dump all the gathered coverage data to disk:
> 
>llvm_gcda_start_file()
>  llvm_gcda_emit_function()
>  llvm_gcda_emit_arcs()
>  llvm_gcda_emit_function()
>  llvm_gcda_emit_arcs()
>  [... repeats for each function ...]
>llvm_gcda_summary_info()
>llvm_gcda_end_file()
> 
> This design is much more stateless and unstructured than gcc's, and is
> intended to run at process exit.  This forces us to keep some local state
> about which module we're dealing with at the moment.  On the other hand, it
> also means we don't depend as much on how LLVM represents profiling data
> internally.
> 
> See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more
> details on how this works, particularly GCOVProfiler::emitProfileArcs(),
> GCOVProfiler::insertCounterWriteout(), and GCOVProfiler::insertFlush().

checkpatch speaketh truth.

Also, I'll change those Co-authored-by's to the documented Co-developed-by.


From: Andrew Morton 
Subject: gcov-clang-support-checkpatch-fixes

WARNING: Non-standard signature: Co-authored-by:
#31: 
Co-authored-by: Nick Desaulniers 

WARNING: Non-standard signature: Co-authored-by:
#32: 
Co-authored-by: Tri Vo 

WARNING: Possible unnecessary 'out of memory' message
#158: FILE: kernel/gcov/clang.c:90:
+   if (!info) {
+   pr_warn_ratelimited("failed to allocate gcov info\n");

WARNING: Possible unnecessary 'out of memory' message
#193: FILE: kernel/gcov/clang.c:125:
+   if (!info) {
+   pr_warn_ratelimited("failed to allocate gcov function info for 
%s\n",

WARNING: line over 80 characters
#546: FILE: kernel/gcov/clang.c:478:
+   pos += store_gcov_u32(buffer, pos, 
fi_ptr->cfg_checksum);

total: 0 errors, 5 warnings, 663 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
  mechanically convert to the typical style using --fix or --fix-inplace.

./patches/gcov-clang-support.patch has style problems, please review.

NOTE: If any of the errors are false positives, please report
  them to the maintainer, see CHECKPATCH in MAINTAINERS.

Please run checkpatch prior to sending patches

Cc: Daniel Mentz 
Cc: Greg Hackmann 
Cc: Nick Desaulniers 
Cc: Peter Oberparleiter 
Cc: Petri Gynther 
Cc: Prasad Sodagudi 
Cc: Trilok Soni 
Cc: Tri Vo 
Signed-off-by: Andrew Morton 
---

 kernel/gcov/clang.c |9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

--- a/kernel/gcov/clang.c~gcov-clang-support-checkpatch-fixes
+++ a/kernel/gcov/clang.c
@@ -86,10 +86,8 @@ void llvm_gcov_init(llvm_gcov_callback w
 {
struct gcov_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
 
-   if (!info) {
-   pr_warn_ratelimited("failed to allocate gcov info\n");
+   if (!info)
return;
-   }
 
INIT_LIST_HEAD(>head);
INIT_LIST_HEAD(>functions);
@@ -121,11 +119,8 @@ void llvm_gcda_emit_function(u32 ident,
 {
struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
 
-   if (!info) {
-   pr_warn_ratelimited("failed to allocate gcov function info for 
%s\n",
-   function_name ?: "UNKNOWN");
+   if (!info)
return;
-   }
 
INIT_LIST_HEAD(>head);
info->ident = ident;
_



Re: [PATCH v2] mtd: rawnand: ams-delta: Drop board specific partition info

2019-04-17 Thread Janusz Krzysztofik
Hi Aaro, Tony,

On Wednesday, April 17, 2019 11:40:10 AM CEST Miquel Raynal wrote:
> Hi Janusz,
> 
> Janusz Krzysztofik  wrote on Sun, 24 Mar 2019
> 23:33:44 +0100:
> 
> > After recent modifications, only a hardcoded partition info makes
> > the driver device specific.  Other than that, the driver uses GPIO
> > exclusively and can be used on any hardware.
> > 
> > Drop the partition info and use MTD partition parser with default list
> > of parser names instead.  For the OF parser to work correctly, pass
> > device of_node to mtd.
> > 
> > Amstrad Delta users should append the following partition info to their
> > kernel command line, possibly by embedding it in CONFIG_CMDLINE:
> > 
> > mtdparts=ams-delta-nand:3584k(Kernel),256k(u-boot),256k(u-boot_params),\
> > 256k(Amstrad_LDR),27m(File_system),768k(PBL_reserved).
> > 
> > For their convenience, CONFIG_MTD_CMDLINE_PARTS symbol is selected
> > automatically from that board Kconfig if this NAND driver is also
> > selected.
> > 
> > Signed-off-by: Janusz Krzysztofik 
> > Cc: Tony Lindgren 
> > ---
> 
> FYI I am okay with the change but I am waiting for acks before applying
> it.

May we have an ack from you?

If still not convinced with my clarifications, I can add a comment to help text 
in Kconfig, either squashed or in a follow up patch, on the requirement of 
appending mtdparts parameter to command line.  What do you think?

Thanks,
Janusz

> 
> Thanks,
> Miquèl
> 






Re: [PATCH v8 08/16] sched/core: uclamp: Set default clamps for RT tasks

2019-04-17 Thread Suren Baghdasaryan
On Tue, Apr 2, 2019 at 3:42 AM Patrick Bellasi  wrote:
>
> By default FAIR tasks start without clamps, i.e. neither boosted nor
> capped, and they run at the best frequency matching their utilization
> demand.  This default behavior does not fit RT tasks which instead are
> expected to run at the maximum available frequency, if not otherwise
> required by explicitly capping them.
>
> Enforce the correct behavior for RT tasks by setting util_min to max
> whenever:
>
>  1. the task is switched to the RT class and it does not already have a
> user-defined clamp value assigned.
>
>  2. an RT task is forked from a parent with RESET_ON_FORK set.
>
> NOTE: utilization clamp values are cross scheduling class attributes and
> thus they are never changed/reset once a value has been explicitly
> defined from user-space.
>
> Signed-off-by: Patrick Bellasi 
> Cc: Ingo Molnar 
> Cc: Peter Zijlstra 
> ---
>  kernel/sched/core.c | 26 ++
>  1 file changed, 26 insertions(+)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index bdebdabe9bc4..71c9dd6487b1 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1042,6 +1042,28 @@ static int uclamp_validate(struct task_struct *p,
>  static void __setscheduler_uclamp(struct task_struct *p,
>   const struct sched_attr *attr)
>  {
> +   unsigned int clamp_id;
> +
> +   /*
> +* On scheduling class change, reset to default clamps for tasks
> +* without a task-specific value.
> +*/
> +   for (clamp_id = 0; clamp_id < UCLAMP_CNT; ++clamp_id) {
> +   struct uclamp_se *uc_se = >uclamp_req[clamp_id];
> +   unsigned int clamp_value = uclamp_none(clamp_id);
> +
> +   /* Keep using defined clamps across class changes */
> +   if (uc_se->user_defined)
> +   continue;
> +
> +   /* By default, RT tasks always get 100% boost */
> +   if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> +   clamp_value = uclamp_none(UCLAMP_MAX);
> +
> +   uc_se->bucket_id = uclamp_bucket_id(clamp_value);
> +   uc_se->value = clamp_value;

Is it possible for p->uclamp_req[UCLAMP_MAX].value to be less than
uclamp_none(UCLAMP_MAX) for this RT task? If that's a possibility then
I think we will end up with a case of p->uclamp_req[UCLAMP_MIN].value
> p->uclamp_req[UCLAMP_MAX].value after these assignments are done.

> +   }
> +
> if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
> return;
>
> @@ -1077,6 +1099,10 @@ static void uclamp_fork(struct task_struct *p)
> for (clamp_id = 0; clamp_id < UCLAMP_CNT; ++clamp_id) {
> unsigned int clamp_value = uclamp_none(clamp_id);
>
> +   /* By default, RT tasks always get 100% boost */
> +   if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
> +   clamp_value = uclamp_none(UCLAMP_MAX);
> +
> p->uclamp_req[clamp_id].user_defined = false;
> p->uclamp_req[clamp_id].value = clamp_value;
> p->uclamp_req[clamp_id].bucket_id = 
> uclamp_bucket_id(clamp_value);
> --
> 2.20.1
>


Re: [PATCH v5 3/3] platform/x86: intel_pmc_core: Instantiate pmc_core device on legacy platforms

2019-04-17 Thread Rajat Jain
On Thu, Apr 11, 2019 at 6:46 AM Andy Shevchenko
 wrote:
>
> On Thu, Apr 11, 2019 at 3:38 AM Rajat Jain  wrote:
> >
> > Add code to instantiate the pmc_core platform device and thus attach to
> > the driver, if the ACPI device for the same ("INT33A1") is not present
> > in a system where it should be. This was discussed here:
> > https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1966991.html
> >
> > Signed-off-by: Rajat Jain 
> > ---
> > v5: Remove the gerrit id from commit log
> > v4: Rename file, remove dummy arg, 1 conditional per if statement,
> > simplify init / exit calls..
> > v3: (first version of *this* patch -to go with rest of v3 patchset)
> > v2: (does not exist)
> > v1: (does not exist)
> >
> >  drivers/platform/x86/Makefile |  2 +-
> >  .../platform/x86/intel_pmc_core_plat_drv.c| 60 +++
> >  2 files changed, 61 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/platform/x86/intel_pmc_core_plat_drv.c
> >
> > diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
> > index 86cb76677bc8..7041a88c34c7 100644
> > --- a/drivers/platform/x86/Makefile
> > +++ b/drivers/platform/x86/Makefile
> > @@ -89,7 +89,7 @@ obj-$(CONFIG_INTEL_BXTWC_PMIC_TMU)+= intel_bxtwc_tmu.o
> >  obj-$(CONFIG_INTEL_TELEMETRY)  += intel_telemetry_core.o \
> >intel_telemetry_pltdrv.o \
> >intel_telemetry_debugfs.o
> > -obj-$(CONFIG_INTEL_PMC_CORE)+= intel_pmc_core.o
> > +obj-$(CONFIG_INTEL_PMC_CORE)+= intel_pmc_core.o 
> > intel_pmc_core_plat_drv.o
> >  obj-$(CONFIG_PMC_ATOM) += pmc_atom.o
> >  obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
> >  obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
> > diff --git a/drivers/platform/x86/intel_pmc_core_plat_drv.c 
> > b/drivers/platform/x86/intel_pmc_core_plat_drv.c
> > new file mode 100644
> > index ..7f5af5bd7f1f
> > --- /dev/null
> > +++ b/drivers/platform/x86/intel_pmc_core_plat_drv.c
> > @@ -0,0 +1,60 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +/*
> > + * Intel PMC Core platform init
> > + * Copyright (c) 2019, Google Inc.
> > + * Author - Rajat Jain
> > + *
> > + * This code instantiates platform devices for intel_pmc_core driver, only
> > + * on supported platforms that may not have the ACPI devices in the ACPI 
> > tables.
> > + * No new platforms should be added here, because we expect that new 
> > platforms
> > + * should all have the ACPI device, which is the preferred way of 
> > enumeration.
> > + */
> > +
> > +#include 
> > +#include 
> > +
> > +#include 
> > +#include 
> > +
> > +static struct platform_device pmc_core_device = {
> > +   .name = "pmc_core",
>
> This name is too generic. Better to use the current module name, i.e.
> "intel_pmc_core".

done


>
> However, see my comment to the first patch in the series.
>
> > +};
> > +
> > +/*
> > + * intel_pmc_core_platform_ids is the list of platforms where we want to
> > + * instantiate the platform_device if not already instantiated. This is
> > + * different than intel_pmc_core_ids in intel_pmc_core.c which is the
> > + * list of platforms that the driver supports for pmc_core device. The
> > + * other list may grow, but this list should not.
> > + */
> > +static const struct x86_cpu_id intel_pmc_core_platform_ids[] = {
> > +   INTEL_CPU_FAM6(SKYLAKE_MOBILE, pmc_core_device),
> > +   INTEL_CPU_FAM6(SKYLAKE_DESKTOP, pmc_core_device),
> > +   INTEL_CPU_FAM6(KABYLAKE_MOBILE, pmc_core_device),
> > +   INTEL_CPU_FAM6(KABYLAKE_DESKTOP, pmc_core_device),
> > +   INTEL_CPU_FAM6(CANNONLAKE_MOBILE, pmc_core_device),
> > +   INTEL_CPU_FAM6(ICELAKE_MOBILE, pmc_core_device),
> > +   {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids);
> > +
> > +static int __init pmc_core_platform_init(void)
> > +{
> > +   /* Skip creating the platform device if ACPI already has a device */
> > +   if (acpi_dev_present("INT33A1", NULL, -1))
> > +   return -ENODEV;
> > +
> > +   if (!x86_match_cpu(intel_pmc_core_platform_ids))
> > +   return -ENODEV;
> > +
> > +   return platform_device_register(_core_device);
> > +}
> > +
> > +static void __exit pmc_core_platform_exit(void)
> > +{
> > +   platform_device_unregister(_core_device);
> > +}
> > +
> > +module_init(pmc_core_platform_init);
> > +module_exit(pmc_core_platform_exit);
> > --
> > 2.21.0.392.gf8f6787159e-goog
> >
>
>
> --
> With Best Regards,
> Andy Shevchenko


Re: [PATCH v5 2/3] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-04-17 Thread Rajat Jain
On Thu, Apr 11, 2019 at 6:40 AM Andy Shevchenko
 wrote:
>
> On Thu, Apr 11, 2019 at 3:38 AM Rajat Jain  wrote:
> >
> > Add a module parameter which when enabled, will check on resume, if the
> > last S0ix attempt was successful. If not, the driver would warn and provide
> > helpful debug information (which gets latched during the failed suspend
> > attempt) to debug the S0ix failure.
> >
> > This information is very useful to debug S0ix failures. Specially since
> > the latched debug information will be lost (over-written) if the system
> > attempts to go into runtime (or imminent) S0ix again after that failed
> > suspend attempt.
>
> > +static int pmc_core_suspend(struct device *dev)
> > +{
> > +   struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > +
> > +   pmcdev->check_counters = false;
> > +
> > +   /* No warnings on S0ix failures */
> > +   if (!warn_on_s0ix_failures)
> > +   return 0;
> > +
> > +   /* Check if the syspend will actually use S0ix */
> > +   if (pm_suspend_via_firmware())
> > +   return 0;
> > +
> > +   /* Save PC10 and S0ix residency for checking later */
>
> > +   if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, >pc10_counter) &&
> > +   !pmc_core_dev_state_get(pmcdev, >s0ix_counter))
>
> Split it.

done

>
> > +   pmcdev->check_counters = true;
> > +
> > +   return 0;
> > +}
> > +
> > +static inline bool pmc_core_is_pc10_failed(struct pmc_dev *pmcdev)
> > +{
> > +   u64 pc10_counter;
> > +
> > +   if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, _counter) &&
> > +   pc10_counter == pmcdev->pc10_counter)
> > +   return true;
>
> Split this as well.

done
>
> > +
> > +   return false;
> > +}
> > +
> > +static inline bool pmc_core_is_s0ix_failed(struct pmc_dev *pmcdev)
> > +{
> > +   u64 s0ix_counter;
> > +
> > +   if (!pmc_core_dev_state_get(pmcdev, _counter) &&
> > +   s0ix_counter == pmcdev->s0ix_counter)
> > +   return true;
>
> And this.

done
>
> > +
> > +   return false;
> > +}
> > +
> > +static int pmc_core_resume(struct device *dev)
> > +{
> > +   struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > +
> > +   if (!pmcdev->check_counters)
> > +   return 0;
> > +
> > +   if (pmc_core_is_pc10_failed(pmcdev)) {
> > +   dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> > +pmcdev->pc10_counter);
> > +   } else if (pmc_core_is_s0ix_failed(pmcdev)) {
>
> > +
>
> Redundant.

I did not quite understand the comment, but I have restructured this
and I think this concerned will be addressed.

>
> > +   const struct pmc_bit_map **maps = 
> > pmcdev->map->slps0_dbg_maps;
> > +   const struct pmc_bit_map *map;
> > +   int offset = pmcdev->map->slps0_dbg_offset;
> > +   u32 data;
> > +
> > +   dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> > +pmcdev->s0ix_counter);
> > +   while (*maps) {
> > +   map = *maps;
> > +   data = pmc_core_reg_read(pmcdev, offset);
> > +   offset += 4;
> > +   while (map->name) {
> > +   dev_warn(dev, "SLP_S0_DBG: %-32s\tState: 
> > %s\n",
> > +map->name,
> > +data & map->bit_mask ? "Yes" : 
> > "No");
>
> > +   ++map;
>
> map++;

done
>
> > +   }
> > +   ++maps;
>
> maps++;

done
>
> > +   }
>
> This is quite noisy. You need to print only what is important. I don't
> think polluting dmesg with piles of these kind of messages is a good
> idea.
> Also, it is more likely should be done on debug level (except may be
> one or two messages with really important information).

Changed it to dev_dbg in my latest patch. I do not know if a subset of
this information will be helpful to Intel to debug S0ix failures. This
is something I'd like to defer to Rajneesh. I'd be happy to cut it
short if it can still get the info Intel needs to debug S0ix failures.




>
> > +   }
> > +   return 0;
> > +}
> > +
> > +#endif
>
> --
> With Best Regards,
> Andy Shevchenko


Re: [PATCH v5 1/3] platform/x86: intel_pmc_core: Convert to a platform_driver

2019-04-17 Thread Rajat Jain
On Thu, Apr 11, 2019 at 6:44 AM Andy Shevchenko
 wrote:
>
> On Thu, Apr 11, 2019 at 3:37 AM Rajat Jain  wrote:
> >
> > Convert the intel_pmc_core driver to a platform driver, and attach using
> > the ACPI enumeration method (via the ACPI device "INT33A1").
>
> > cpu_id = x86_match_cpu(intel_pmc_core_ids);
> > if (!cpu_id)
> > return -ENODEV;
> > @@ -888,28 +893,49 @@ static int __init pmc_core_probe(void)
> > mutex_init(>lock);
> > pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit();
> >
> > +   dmi_check_system(pmc_core_dmi_table);
> > +   platform_set_drvdata(pdev, pmcdev);
> > +
> > err = pmc_core_dbgfs_register(pmcdev);
> > if (err < 0) {
> > -   pr_warn(" debugfs register failed.\n");
> > +   dev_warn(>dev, "debugfs register failed.\n");
> > iounmap(pmcdev->regbase);
> > return err;
> > }
> >
> > -   dmi_check_system(pmc_core_dmi_table);
> > -   pr_info(" initialized\n");
>
> > +   dev_info(>dev, " initialized\n");
> > +   device_initialized = true;
>
> First you do something, then print it's done, and not other way around.

done

>
> > +
> > return 0;
> >  }
> > -module_init(pmc_core_probe)
> >
> > -static void __exit pmc_core_remove(void)
> > +static int pmc_core_remove(struct platform_device *pdev)
> >  {
> > -   struct pmc_dev *pmcdev = 
> > +   struct pmc_dev *pmcdev = platform_get_drvdata(pdev);
> >
> > +   platform_set_drvdata(pdev, NULL);
> > pmc_core_dbgfs_unregister(pmcdev);
> > mutex_destroy(>lock);
> > iounmap(pmcdev->regbase);
> > +   return 0;
> >  }
> > -module_exit(pmc_core_remove)
> > +
> > +static const struct acpi_device_id pmc_core_acpi_ids[] = {
> > +   {"INT33A1", 0}, /* _HID for Intel Power Engine, _CID PNP0D80*/
> > +   { }
> > +};
> > +MODULE_DEVICE_TABLE(acpi, pmc_core_acpi_ids);
> > +
> > +static struct platform_driver pmc_core_driver = {
> > +   .driver = {
> > +   .name = "pmc_core",
> > +   .acpi_match_table = ACPI_PTR(pmc_core_acpi_ids),
> > +   },
> > +   .probe = pmc_core_probe,
> > +   .remove = pmc_core_remove,
> > +};
> > +
> > +module_platform_driver(pmc_core_driver);
>
> So, this patch has a bisectability issue. After it AFAIU some
> platforms will not be able to enumerate the device.
>
> To avoid such you have to reconsider logic behind this conversion, i.e.
> 1. Split the driver to core part and current initialization mechanism
> 2. Add platform driver as a separate module.

done.


>
> --
> With Best Regards,
> Andy Shevchenko


Re: [PATCH v4 1/2] mm: refactor __vunmap() to avoid duplicated call to find_vm_area()

2019-04-17 Thread Roman Gushchin
On Wed, Apr 17, 2019 at 02:58:27PM -0700, Andrew Morton wrote:
> On Wed, 17 Apr 2019 12:40:01 -0700 Roman Gushchin  wrote:
> 
> > __vunmap() calls find_vm_area() twice without an obvious reason:
> > first directly to get the area pointer, second indirectly by calling
> > remove_vm_area(), which is again searching for the area.
> > 
> > To remove this redundancy, let's split remove_vm_area() into
> > __remove_vm_area(struct vmap_area *), which performs the actual area
> > removal, and remove_vm_area(const void *addr) wrapper, which can
> > be used everywhere, where it has been used before.
> > 
> > On my test setup, I've got 5-10% speed up on vfree()'ing 100
> > of 4-pages vmalloc blocks.
> > 
> > Perf report before:
> >   22.64%  cat  [kernel.vmlinux]  [k] free_pcppages_bulk
> >   10.30%  cat  [kernel.vmlinux]  [k] __vunmap
> >9.80%  cat  [kernel.vmlinux]  [k] find_vmap_area
> >8.11%  cat  [kernel.vmlinux]  [k] vunmap_page_range
> >4.20%  cat  [kernel.vmlinux]  [k] __slab_free
> >3.56%  cat  [kernel.vmlinux]  [k] __list_del_entry_valid
> >3.46%  cat  [kernel.vmlinux]  [k] smp_call_function_many
> >3.33%  cat  [kernel.vmlinux]  [k] kfree
> >3.32%  cat  [kernel.vmlinux]  [k] free_unref_page
> > 
> > Perf report after:
> >   23.01%  cat  [kernel.kallsyms]  [k] free_pcppages_bulk
> >9.46%  cat  [kernel.kallsyms]  [k] __vunmap
> >9.15%  cat  [kernel.kallsyms]  [k] vunmap_page_range
> >6.17%  cat  [kernel.kallsyms]  [k] __slab_free
> >5.61%  cat  [kernel.kallsyms]  [k] kfree
> >4.86%  cat  [kernel.kallsyms]  [k] bad_range
> >4.67%  cat  [kernel.kallsyms]  [k] free_unref_page_commit
> >4.24%  cat  [kernel.kallsyms]  [k] __list_del_entry_valid
> >3.68%  cat  [kernel.kallsyms]  [k] free_unref_page
> >3.65%  cat  [kernel.kallsyms]  [k] __list_add_valid
> >3.19%  cat  [kernel.kallsyms]  [k] __purge_vmap_area_lazy
> >3.10%  cat  [kernel.kallsyms]  [k] find_vmap_area
> >3.05%  cat  [kernel.kallsyms]  [k] rcu_cblist_dequeue
> > 
> > ...
> >
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -2068,6 +2068,24 @@ struct vm_struct *find_vm_area(const void *addr)
> > return NULL;
> >  }
> >  
> > +static struct vm_struct *__remove_vm_area(struct vmap_area *va)
> > +{
> > +   struct vm_struct *vm = va->vm;
> > +
> > +   might_sleep();
> 
> Where might __remove_vm_area() sleep?
> 
> From a quick scan I'm only seeing vfree(), and that has the
> might_sleep_if(!in_interrupt()).
> 
> So perhaps we can remove this...

Agree. Here is the patch.

Thank you!

--

>From 4adf58e4d3ffe45a542156ca0bce3dc9f6679939 Mon Sep 17 00:00:00 2001
From: Roman Gushchin 
Date: Wed, 17 Apr 2019 15:55:49 -0700
Subject: [PATCH] mm: remove might_sleep() in __remove_vm_area()

__remove_vm_area() has a redundant might_sleep() call, which isn't
really required, because the only place it can sleep is vfree()
and it already contains might_sleep_if(!in_interrupt()).

Suggested-by: Andrew Morton 
Signed-off-by: Roman Gushchin 
---
 mm/vmalloc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 69a5673c4cd3..4a91acce4b5f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2079,8 +2079,6 @@ static struct vm_struct *__remove_vm_area(struct 
vmap_area *va)
 {
struct vm_struct *vm = va->vm;
 
-   might_sleep();
-
spin_lock(_area_lock);
va->vm = NULL;
va->flags &= ~VM_VM_AREA;
-- 
2.20.1



Re: linux-next: build failure after merge of the akpm-current tree

2019-04-17 Thread Stephen Rothwell
Hi Kees,

On Wed, 17 Apr 2019 17:28:39 -0500 Kees Cook  wrote:
>
> On Wed, Apr 17, 2019 at 5:22 PM Kees Cook  wrote:
> >
> > On Wed, Apr 17, 2019 at 1:53 AM Stephen Rothwell  
> > wrote:  
> > >
> > > Hi Andrew,
> > >
> > > After merging the akpm-current tree, today's linux-next build (arm
> > > multi_v7_defconfig) failed like this:
> > >
> > > fs/binfmt_elf.c: In function 'load_elf_binary':
> > > fs/binfmt_elf.c:1140:7: error: 'elf_interpreter' undeclared (first use in 
> > > this function); did you mean 'interpreter'?
> > >   if (!elf_interpreter)
> > >^~~
> > >interpreter  
> >
> > static int load_elf_binary(struct linux_binprm *bprm)
> > {
> > ...
> > char * elf_interpreter = NULL;
> >
> > This is _absolutely_ a valid variable.  

It was. However commit a34f642bccf1 from Andrew's tree changes its scope.

So there is nothing wrong with commit 3ebf0dd657ce, it is the incorrect
rebase of it on top of a34f642bccf1 that causes the build problem.

> > > Caused by commit
> > >
> > >   3ebf0dd657ce ("fs/binfmt_elf.c: move brk out of mmap when doing direct 
> > > loader exec")
> > >
> > > interacting with commit
> > >
> > >   a34f642bccf1 ("fs/binfmt_elf.c: free PT_INTERP filename ASAP")
> > >
> > > I have applied the following patch for today.
> > >
> > > From: Stephen Rothwell 
> > > Date: Wed, 17 Apr 2019 16:48:29 +1000
> > > Subject: [PATCH] fix "fs/binfmt_elf.c: move brk out of mmap when doing 
> > > direct loader exec"
> > >
> > > Signed-off-by: Stephen Rothwell 
> > > ---
> > >  fs/binfmt_elf.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > > index b3bbe6bca499..fe5668a1bbaa 100644
> > > --- a/fs/binfmt_elf.c
> > > +++ b/fs/binfmt_elf.c
> > > @@ -1137,7 +1137,7 @@ static int load_elf_binary(struct linux_binprm 
> > > *bprm)
> > >  * collide early with the stack growing down), and into the unused
> > >  * ELF_ET_DYN_BASE region.
> > >  */
> > > -   if (!elf_interpreter)
> > > +   if (!interpreter)  
> >
> > No, this is very wrong and will, I think, cause all PIE binaries to fail to 
> > run.  
> 
> I may be wrong: I think this will cause all static binaries to see
> their brk moved very unexpectedly. All static PIE binaries will fail?

Are you sure that elf_interpreter == NULL is not equivalent to
interpreter == NULL by this point in the code?  Earlier if
elf_intpreter is not NULL, we have set interpreter (using open_exec)
and errored out if that fails.
-- 
Cheers,
Stephen Rothwell


pgpATuAIcs8I4.pgp
Description: OpenPGP digital signature


[PATCH v6 2/3] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-04-17 Thread Rajat Jain
Add a module parameter which when enabled, will check on resume, if the
last S0ix attempt was successful. If not, the driver would warn and provide
helpful debug information (which gets latched during the failed suspend
attempt) to debug the S0ix failure.

This information is very useful to debug S0ix failures. Specially since
the latched debug information will be lost (over-written) if the system
attempts to go into runtime (or imminent) S0ix again after that failed
suspend attempt.

Signed-off-by: Rajat Jain 
---
v6: Some code restructuring, single conditional per if stmt.
Change from dev_warn to dev_dbg.
v5: Remove the gerrit id from commit log
v4: Use 1 condition per if statement, rename some functions.
v3: No changes
v2: Use pm_suspend_via_firmware() to enable the check only for S0ix

 drivers/platform/x86/intel_pmc_core.c | 105 ++
 drivers/platform/x86/intel_pmc_core.h |   7 ++
 2 files changed, 112 insertions(+)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 153e79cc4d5b..1d902230ba61 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -919,9 +920,113 @@ static int pmc_core_remove(struct platform_device *pdev)
return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+
+static bool warn_on_s0ix_failures;
+module_param(warn_on_s0ix_failures, bool, 0644);
+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix failures");
+
+static int pmc_core_suspend(struct device *dev)
+{
+   struct pmc_dev *pmcdev = dev_get_drvdata(dev);
+
+   pmcdev->check_counters = false;
+
+   /* No warnings on S0ix failures */
+   if (!warn_on_s0ix_failures)
+   return 0;
+
+   /* Check if the syspend will actually use S0ix */
+   if (pm_suspend_via_firmware())
+   return 0;
+
+   /* Save PC10 residency for checking later */
+   if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, >pc10_counter))
+   return -EIO;
+
+   /* Save S0ix residency for checking later */
+   if (pmc_core_dev_state_get(pmcdev, >s0ix_counter))
+   return -EIO;
+
+   pmcdev->check_counters = true;
+   return 0;
+}
+
+static inline bool pmc_core_is_pc10_failed(struct pmc_dev *pmcdev)
+{
+   u64 pc10_counter;
+
+   if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, _counter))
+   return false;
+
+   if (pc10_counter == pmcdev->pc10_counter)
+   return true;
+
+   return false;
+}
+
+static inline bool pmc_core_is_s0ix_failed(struct pmc_dev *pmcdev)
+{
+   u64 s0ix_counter;
+
+   if (pmc_core_dev_state_get(pmcdev, _counter))
+   return false;
+
+   if (s0ix_counter == pmcdev->s0ix_counter)
+   return true;
+
+   return false;
+}
+
+static int pmc_core_resume(struct device *dev)
+{
+   struct pmc_dev *pmcdev = dev_get_drvdata(dev);
+   const struct pmc_bit_map **maps = pmcdev->map->slps0_dbg_maps;
+   int offset = pmcdev->map->slps0_dbg_offset;
+   const struct pmc_bit_map *map;
+   u32 data;
+
+   if (!pmcdev->check_counters)
+   return 0;
+
+   if (!pmc_core_is_s0ix_failed(pmcdev))
+   return 0;
+
+   if (pmc_core_is_pc10_failed(pmcdev)) {
+   /* S0ix failed because of PC10 entry failure */
+   dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n",
+pmcdev->pc10_counter);
+   return 0;
+   }
+
+   /* The real interesting case - S0ix failed - lets ask PMC why. */
+   dev_warn(dev, "CPU did not enter SLP_S0!!! (S0ix cnt=%llu)\n",
+pmcdev->s0ix_counter);
+   while (*maps) {
+   map = *maps;
+   data = pmc_core_reg_read(pmcdev, offset);
+   offset += 4;
+   while (map->name) {
+   dev_dbg(dev, "SLP_S0_DBG: %-32s\tState: %s\n",
+   map->name,
+   data & map->bit_mask ? "Yes" : "No");
+   map++;
+   }
+   maps++;
+   }
+   return 0;
+}
+
+#endif
+
+static const struct dev_pm_ops pmc_core_pm_ops = {
+   SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend, pmc_core_resume)
+};
+
 static struct platform_driver pmc_core_driver = {
.driver = {
.name = "intel_pmc_core",
+   .pm = _core_pm_ops,
},
.probe = pmc_core_probe,
.remove = pmc_core_remove,
diff --git a/drivers/platform/x86/intel_pmc_core.h 
b/drivers/platform/x86/intel_pmc_core.h
index 88d9c0653a5f..fdee5772e532 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -241,6 +241,9 @@ struct pmc_reg_map {
  * @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers
  * used to read MPHY PG and PLL 

[PATCH v6 3/3] platform/x86: intel_pmc_core: Attach using APCI HID "INT33A1"

2019-04-17 Thread Rajat Jain
Most modern platforms already have the ACPI device "INT33A1" that could
be used to attach to the driver. Switch the driver to using that and
thus make the intel_pmc_core.c a pure platform_driver.

Some of the legacy platforms though, may still not have this ACPI device
in their ACPI tables. Thus for such platforms, move the code to manually
instantiate a platform_device into a new file of its own. This would
instantiate the intel_pmc_core platform device and thus attach to
the driver, if the ACPI device for the same ("INT33A1") is not present
in a system where it should be. This was discussed here:
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1966991.html

Signed-off-by: Rajat Jain 
---
v6: Squash some of the changes of first patch into this patch to work
around the bisectability issue.
v5: Remove the gerrit id from commit log
v4: Rename file, remove dummy arg, 1 conditional per if statement,
simplify init / exit calls..
v3: (first version of *this* patch -to go with rest of v3 patchset)
v2: (does not exist)
v1: (does not exist)

 drivers/platform/x86/Makefile |  2 +-
 drivers/platform/x86/intel_pmc_core.c | 40 +++--
 .../platform/x86/intel_pmc_core_plat_drv.c| 60 +++
 3 files changed, 69 insertions(+), 33 deletions(-)
 create mode 100644 drivers/platform/x86/intel_pmc_core_plat_drv.c

diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 86cb76677bc8..7041a88c34c7 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -89,7 +89,7 @@ obj-$(CONFIG_INTEL_BXTWC_PMIC_TMU)+= intel_bxtwc_tmu.o
 obj-$(CONFIG_INTEL_TELEMETRY)  += intel_telemetry_core.o \
   intel_telemetry_pltdrv.o \
   intel_telemetry_debugfs.o
-obj-$(CONFIG_INTEL_PMC_CORE)+= intel_pmc_core.o
+obj-$(CONFIG_INTEL_PMC_CORE)+= intel_pmc_core.o intel_pmc_core_plat_drv.o
 obj-$(CONFIG_PMC_ATOM) += pmc_atom.o
 obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
 obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 1d902230ba61..f20d08ad39ea 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -1023,47 +1023,23 @@ static const struct dev_pm_ops pmc_core_pm_ops = {
SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend, pmc_core_resume)
 };
 
+static const struct acpi_device_id pmc_core_acpi_ids[] = {
+   {"INT33A1", 0}, /* _HID for Intel Power Engine, _CID PNP0D80*/
+   { }
+};
+MODULE_DEVICE_TABLE(acpi, pmc_core_acpi_ids);
+
 static struct platform_driver pmc_core_driver = {
.driver = {
.name = "intel_pmc_core",
+   .acpi_match_table = ACPI_PTR(pmc_core_acpi_ids),
.pm = _core_pm_ops,
},
.probe = pmc_core_probe,
.remove = pmc_core_remove,
 };
 
-static struct platform_device pmc_core_device = {
-   .name = "intel_pmc_core",
-};
-
-static int __init pmc_core_init(void)
-{
-   int ret;
-
-   if (!x86_match_cpu(intel_pmc_core_ids))
-   return -ENODEV;
-
-   ret = platform_driver_register(_core_driver);
-   if (ret)
-   return ret;
-
-   ret = platform_device_register(_core_device);
-   if (ret) {
-   platform_driver_unregister(_core_driver);
-   return ret;
-   }
-
-   return 0;
-}
-
-static void __exit pmc_core_exit(void)
-{
-   platform_device_unregister(_core_device);
-   platform_driver_unregister(_core_driver);
-}
-
-module_init(pmc_core_init)
-module_exit(pmc_core_exit)
+module_platform_driver(pmc_core_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Intel PMC Core Driver");
diff --git a/drivers/platform/x86/intel_pmc_core_plat_drv.c 
b/drivers/platform/x86/intel_pmc_core_plat_drv.c
new file mode 100644
index ..20be3eaeb722
--- /dev/null
+++ b/drivers/platform/x86/intel_pmc_core_plat_drv.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Intel PMC Core platform init
+ * Copyright (c) 2019, Google Inc.
+ * Author - Rajat Jain
+ *
+ * This code instantiates platform devices for intel_pmc_core driver, only
+ * on supported platforms that may not have the ACPI devices in the ACPI 
tables.
+ * No new platforms should be added here, because we expect that new platforms
+ * should all have the ACPI device, which is the preferred way of enumeration.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+
+static struct platform_device pmc_core_device = {
+   .name = "intel_pmc_core",
+};
+
+/*
+ * intel_pmc_core_platform_ids is the list of platforms where we want to
+ * instantiate the platform_device if not already instantiated. This is
+ * different than intel_pmc_core_ids in intel_pmc_core.c which is the
+ * list of platforms that the driver supports for pmc_core device. The
+ * other list may 

[PATCH v6 1/3] platform/x86: intel_pmc_core: Convert to a platform_driver

2019-04-17 Thread Rajat Jain
Convert the intel_pmc_core driver to a platform driver. There is no
functional change to the driver, or to the way the devices are
instantiated.

Signed-off-by: Rajat Jain 
---
v6: Let the way devices are instantiated exactly the same as before this
patch (so no functional change other than converting to a platform
device). Use "intel_pmc_core" platform device (no ACPI
instantiation)
v5: Remove the gerrit ID from commit log
v4: put back the x86_match_cpu() method.
v3: Don't instantiate the platform_device. Use ACPI enumeration.
v2: Rephrase the commit log. No code changes.

 drivers/platform/x86/intel_pmc_core.c | 65 +++
 1 file changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 9908d233305e..153e79cc4d5b 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -854,13 +855,17 @@ static const struct dmi_system_id pmc_core_dmi_table[]  = 
{
{}
 };
 
-static int __init pmc_core_probe(void)
+static int pmc_core_probe(struct platform_device *pdev)
 {
+   static bool device_initialized;
struct pmc_dev *pmcdev = 
const struct x86_cpu_id *cpu_id;
u64 slp_s0_addr;
int err;
 
+   if (device_initialized)
+   return -ENODEV;
+
cpu_id = x86_match_cpu(intel_pmc_core_ids);
if (!cpu_id)
return -ENODEV;
@@ -886,30 +891,74 @@ static int __init pmc_core_probe(void)
return -ENOMEM;
 
mutex_init(>lock);
+   platform_set_drvdata(pdev, pmcdev);
pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit();
+   dmi_check_system(pmc_core_dmi_table);
 
err = pmc_core_dbgfs_register(pmcdev);
if (err < 0) {
-   pr_warn(" debugfs register failed.\n");
+   dev_warn(>dev, "debugfs register failed.\n");
iounmap(pmcdev->regbase);
return err;
}
 
-   dmi_check_system(pmc_core_dmi_table);
-   pr_info(" initialized\n");
+   device_initialized = true;
+   dev_info(>dev, " initialized\n");
+
return 0;
 }
-module_init(pmc_core_probe)
 
-static void __exit pmc_core_remove(void)
+static int pmc_core_remove(struct platform_device *pdev)
 {
-   struct pmc_dev *pmcdev = 
+   struct pmc_dev *pmcdev = platform_get_drvdata(pdev);
 
pmc_core_dbgfs_unregister(pmcdev);
+   platform_set_drvdata(pdev, NULL);
mutex_destroy(>lock);
iounmap(pmcdev->regbase);
+   return 0;
+}
+
+static struct platform_driver pmc_core_driver = {
+   .driver = {
+   .name = "intel_pmc_core",
+   },
+   .probe = pmc_core_probe,
+   .remove = pmc_core_remove,
+};
+
+static struct platform_device pmc_core_device = {
+   .name = "intel_pmc_core",
+};
+
+static int __init pmc_core_init(void)
+{
+   int ret;
+
+   if (!x86_match_cpu(intel_pmc_core_ids))
+   return -ENODEV;
+
+   ret = platform_driver_register(_core_driver);
+   if (ret)
+   return ret;
+
+   ret = platform_device_register(_core_device);
+   if (ret) {
+   platform_driver_unregister(_core_driver);
+   return ret;
+   }
+
+   return 0;
+}
+
+static void __exit pmc_core_exit(void)
+{
+   platform_device_unregister(_core_device);
+   platform_driver_unregister(_core_driver);
 }
-module_exit(pmc_core_remove)
+
+module_init(pmc_core_init)
+module_exit(pmc_core_exit)
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Intel PMC Core Driver");
-- 
2.21.0.392.gf8f6787159e-goog



Re: linux-next: build failure after merge of the akpm-current tree

2019-04-17 Thread Kees Cook
On Wed, Apr 17, 2019 at 5:22 PM Kees Cook  wrote:
>
> On Wed, Apr 17, 2019 at 1:53 AM Stephen Rothwell  
> wrote:
> >
> > Hi Andrew,
> >
> > After merging the akpm-current tree, today's linux-next build (arm
> > multi_v7_defconfig) failed like this:
> >
> > fs/binfmt_elf.c: In function 'load_elf_binary':
> > fs/binfmt_elf.c:1140:7: error: 'elf_interpreter' undeclared (first use in 
> > this function); did you mean 'interpreter'?
> >   if (!elf_interpreter)
> >^~~
> >interpreter
>
> static int load_elf_binary(struct linux_binprm *bprm)
> {
> ...
> char * elf_interpreter = NULL;
>
> This is _absolutely_ a valid variable.

I saw a 0day report[1] as well on MIPS for this. Neither have I been
able to reproduce, though. I'm wondering if, due to the misplaced
kfree() that has existed for a while, if some kind of weird scoping is
happening.

What compiler are you using?

[1] https://lists.01.org/pipermail/kbuild-all/2019-April/060058.html

>
> >
> >
> > Caused by commit
> >
> >   3ebf0dd657ce ("fs/binfmt_elf.c: move brk out of mmap when doing direct 
> > loader exec")
> >
> > interacting with commit
> >
> >   a34f642bccf1 ("fs/binfmt_elf.c: free PT_INTERP filename ASAP")
> >
> > I have applied the following patch for today.
> >
> > From: Stephen Rothwell 
> > Date: Wed, 17 Apr 2019 16:48:29 +1000
> > Subject: [PATCH] fix "fs/binfmt_elf.c: move brk out of mmap when doing 
> > direct loader exec"
> >
> > Signed-off-by: Stephen Rothwell 
> > ---
> >  fs/binfmt_elf.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > index b3bbe6bca499..fe5668a1bbaa 100644
> > --- a/fs/binfmt_elf.c
> > +++ b/fs/binfmt_elf.c
> > @@ -1137,7 +1137,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
> >  * collide early with the stack growing down), and into the unused
> >  * ELF_ET_DYN_BASE region.
> >  */
> > -   if (!elf_interpreter)
> > +   if (!interpreter)
>
> No, this is very wrong and will, I think, cause all PIE binaries to fail to 
> run.

I may be wrong: I think this will cause all static binaries to see
their brk moved very unexpectedly. All static PIE binaries will fail?





--
Kees Cook


Re: [PATCH v8 06/16] sched/core: uclamp: Extend sched_setattr() to support utilization clamping

2019-04-17 Thread Suren Baghdasaryan
On Tue, Apr 2, 2019 at 3:42 AM Patrick Bellasi  wrote:
>
> The SCHED_DEADLINE scheduling class provides an advanced and formal
> model to define tasks requirements that can translate into proper
> decisions for both task placements and frequencies selections. Other
> classes have a more simplified model based on the POSIX concept of
> priorities.
>
> Such a simple priority based model however does not allow to exploit
> most advanced features of the Linux scheduler like, for example, driving
> frequencies selection via the schedutil cpufreq governor. However, also
> for non SCHED_DEADLINE tasks, it's still interesting to define tasks
> properties to support scheduler decisions.
>
> Utilization clamping exposes to user-space a new set of per-task
> attributes the scheduler can use as hints about the expected/required
> utilization for a task. This allows to implement a "proactive" per-task
> frequency control policy, a more advanced policy than the current one
> based just on "passive" measured task utilization. For example, it's
> possible to boost interactive tasks (e.g. to get better performance) or
> cap background tasks (e.g. to be more energy/thermal efficient).
>
> Introduce a new API to set utilization clamping values for a specified
> task by extending sched_setattr(), a syscall which already allows to
> define task specific properties for different scheduling classes. A new
> pair of attributes allows to specify a minimum and maximum utilization
> the scheduler can consider for a task.
>
> Do that by validating the required clamp values before and then applying
> the required changes using _the_ same pattern already in use for
> __setscheduler(). This ensures that the task is re-enqueued with the new
> clamp values.
>
> Do not allow to change sched class specific params and non class
> specific params (i.e. clamp values) at the same time.  This keeps things
> simple and still works for the most common cases since we are usually
> interested in just one of the two actions.

Sorry, I can't find where you are checking to eliminate the
possibility of simultaneous changes to both sched class specific
params and non class specific params... Am I too tired or they are
indeed missing?

>
> Signed-off-by: Patrick Bellasi 
> Cc: Ingo Molnar 
> Cc: Peter Zijlstra 
>
> ---
> Changes in v8:
>  Others:
>  - using p->uclamp_req to track clamp values "requested" from userspace
> ---
>  include/linux/sched.h|  9 
>  include/uapi/linux/sched.h   | 12 -
>  include/uapi/linux/sched/types.h | 66 
>  kernel/sched/core.c  | 87 +++-
>  4 files changed, 162 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d8491954e2e1..c2b81a84985b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -585,6 +585,7 @@ struct sched_dl_entity {
>   * @value: clamp value "assigned" to a se
>   * @bucket_id: bucket index corresponding to the "assigned" value
>   * @active:the se is currently refcounted in a rq's bucket
> + * @user_defined:  the requested clamp value comes from user-space
>   *
>   * The bucket_id is the index of the clamp bucket matching the clamp value
>   * which is pre-computed and stored to avoid expensive integer divisions from
> @@ -594,11 +595,19 @@ struct sched_dl_entity {
>   * which can be different from the clamp value "requested" from user-space.
>   * This allows to know a task is refcounted in the rq's bucket corresponding
>   * to the "effective" bucket_id.
> + *
> + * The user_defined bit is set whenever a task has got a task-specific clamp
> + * value requested from userspace, i.e. the system defaults apply to this 
> task
> + * just as a restriction. This allows to relax default clamps when a less
> + * restrictive task-specific value has been requested, thus allowing to
> + * implement a "nice" semantic. For example, a task running with a 20%
> + * default boost can still drop its own boosting to 0%.
>   */
>  struct uclamp_se {
> unsigned int value  : bits_per(SCHED_CAPACITY_SCALE);
> unsigned int bucket_id  : bits_per(UCLAMP_BUCKETS);
> unsigned int active : 1;
> +   unsigned int user_defined   : 1;
>  };
>  #endif /* CONFIG_UCLAMP_TASK */
>
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index 075c610adf45..d2c65617a4a4 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -53,10 +53,20 @@
>  #define SCHED_FLAG_RECLAIM 0x02
>  #define SCHED_FLAG_DL_OVERRUN  0x04
>  #define SCHED_FLAG_KEEP_POLICY 0x08
> +#define SCHED_FLAG_KEEP_PARAMS 0x10
> +#define SCHED_FLAG_UTIL_CLAMP_MIN  0x20
> +#define SCHED_FLAG_UTIL_CLAMP_MAX  0x40
> +
> +#define SCHED_FLAG_KEEP_ALL(SCHED_FLAG_KEEP_POLICY | \
> +SCHED_FLAG_KEEP_PARAMS)
> +
> 

[PATCH v5] gcov: Clang support

2019-04-17 Thread Tri Vo
From: Greg Hackmann 

LLVM uses profiling data that's deliberately similar to GCC, but has a very
different way of exporting that data.  LLVM calls llvm_gcov_init() once per
module, and provides a couple of callbacks that we can use to ask for more
data.

We care about the "writeout" callback, which in turn calls back into
compiler-rt/this module to dump all the gathered coverage data to disk:

   llvm_gcda_start_file()
 llvm_gcda_emit_function()
 llvm_gcda_emit_arcs()
 llvm_gcda_emit_function()
 llvm_gcda_emit_arcs()
 [... repeats for each function ...]
   llvm_gcda_summary_info()
   llvm_gcda_end_file()

This design is much more stateless and unstructured than gcc's, and is
intended to run at process exit.  This forces us to keep some local state
about which module we're dealing with at the moment.  On the other hand, it
also means we don't depend as much on how LLVM represents profiling data
internally.

See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more
details on how this works, particularly GCOVProfiler::emitProfileArcs(),
GCOVProfiler::insertCounterWriteout(), and GCOVProfiler::insertFlush().

Co-authored-by: Nick Desaulniers 
Co-authored-by: Tri Vo 
Signed-off-by: Greg Hackmann 
Signed-off-by: Nick Desaulniers 
Signed-off-by: Tri Vo 
Tested-by: Trilok Soni 
Tested-by: Prasad Sodagudi 
Tested-by: Tri Vo 
Tested-by: Daniel Mentz 
Tested-by: Petri Gynther 
Reviewed-by: Peter Oberparleiter 
---
v5:
- Same as v4. Build error when !CONFIG_MODULES was fixed by another commit
  41e72eeff32c ("module: add stubs for within_module functions")

 kernel/gcov/Kconfig   |   3 +-
 kernel/gcov/Makefile  |   1 +
 kernel/gcov/base.c|   2 +-
 kernel/gcov/clang.c   | 586 ++
 kernel/gcov/gcc_3_4.c |  12 +
 kernel/gcov/gcc_4_7.c |  12 +
 kernel/gcov/gcov.h|   2 +
 7 files changed, 616 insertions(+), 2 deletions(-)
 create mode 100644 kernel/gcov/clang.c

diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 1e3823fa799b..f71c1adcff31 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -53,6 +53,7 @@ config GCOV_PROFILE_ALL
 choice
prompt "Specify GCOV format"
depends on GCOV_KERNEL
+   depends on CC_IS_GCC
---help---
The gcov format is usually determined by the GCC version, and the
default is chosen according to your GCC version. However, there are
@@ -62,7 +63,7 @@ choice

 config GCOV_FORMAT_3_4
bool "GCC 3.4 format"
-   depends on CC_IS_GCC && GCC_VERSION < 40700
+   depends on GCC_VERSION < 40700
---help---
Select this option to use the format defined by GCC 3.4.

diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 45431ed679d1..d66a74b0f100 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -4,3 +4,4 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
 obj-y := base.o fs.o
 obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_base.o gcc_3_4.o
 obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_base.o gcc_4_7.o
+obj-$(CONFIG_CC_IS_CLANG) += clang.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 799d42072727..0ffe9f194080 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -64,7 +64,7 @@ static int gcov_module_notifier(struct notifier_block *nb, 
unsigned long event,

/* Remove entries located in module from linked list. */
while ((info = gcov_info_next(info))) {
-   if (within_module((unsigned long)info, mod)) {
+   if (gcov_info_within_module(info, mod)) {
gcov_info_unlink(prev, info);
if (gcov_events_enabled)
gcov_event(GCOV_REMOVE, info);
diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c
new file mode 100644
index ..125c50397ba2
--- /dev/null
+++ b/kernel/gcov/clang.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Google, Inc.
+ * modified from kernel/gcov/gcc_4_7.c
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * LLVM uses profiling data that's deliberately similar to GCC, but has a
+ * very different way of exporting that data.  LLVM calls llvm_gcov_init() once
+ * per module, and provides a couple of callbacks that we can use to ask for
+ * more data.
+ *
+ * We care about the "writeout" callback, which in turn calls back into
+ * compiler-rt/this module to dump all the gathered coverage data to disk:
+ *
+ *llvm_gcda_start_file()
+ *  llvm_gcda_emit_function()
+ *  llvm_gcda_emit_arcs()
+ *  

  1   2   3   4   5   6   7   8   9   >