[PATCH] powerpc/mm/radix: Fix checkstops caused by invalid tlbiel

2018-04-11 Thread Michael Ellerman
In tlbiel_radix_set_isa300() we use the PPC_TLBIEL() macro to
construct tlbiel instructions. The instruction takes 5 fields, two of
which are registers, and the others are constants. But because it's
constructed with inline asm the compiler doesn't know that.

We got the constraint wrong on the 'r' field, using "r" tells the
compiler to put the value in a register. The value we then get in the
macro is the *register number*, not the value of the field.

That means when we mask the register number with 0x1 we get 0 or 1
depending on which register the compiler happens to put the constant
in, eg:

  li  r10,1
  tlbiel  r8,r9,2,0,0

  li  r7,1
  tlbiel  r10,r6,0,0,1

If we're unlucky we might generate an invalid instruction form, for
example RIC=0, PRS=1 and R=0, tlbiel r8,r7,0,1,0, this has been
observed to cause machine checks:

  Oops: Machine check, sig: 7 [#1]
  CPU: 24 PID: 0 Comm: swapper
  NIP:  000385f4 LR: 0100ed00 CTR: 007f
  REGS: c110bb40 TRAP: 0200
  MSR:  90201003   CR: 4800  XER: 2004
  CFAR: 000385d0 DAR: 1c00 DSISR: 0200 SOFTE: 1

If the machine check happens early in boot while we have MSR_ME=0 it
will escalate into a checkstop and kill the box entirely.

To fix it we could change the inline asm constraint to "i" which
tells the compiler the value is a constant. But a better fix is to just
pass a literal 1 into the macro, which bypasses any problems with inline
asm constraints.

Fixes: d4748276ae14 ("powerpc/64s: Improve local TLB flush for boot and MCE on 
POWER9")
Cc: sta...@vger.kernel.org # v4.16+
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/mm/tlb-radix.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 2fba6170ab3f..a5d7309c2d05 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -33,13 +33,12 @@ static inline void tlbiel_radix_set_isa300(unsigned int 
set, unsigned int is,
 {
unsigned long rb;
unsigned long rs;
-   unsigned int r = 1; /* radix format */
 
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
 
-   asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
-: : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+   asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
+: : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
 : "memory");
 }
 
-- 
2.14.1



Re: [PATCH v2 2/2] powerpc/fadump: Do not use hugepages when fadump is active

2018-04-11 Thread Mahesh Jagannath Salgaonkar
On 04/10/2018 07:11 PM, Hari Bathini wrote:
> FADump capture kernel boots in restricted memory environment preserving
> the context of previous kernel to save vmcore. Supporting hugepages in
> such environment makes things unnecessarily complicated, as hugepages
> need memory set aside for them. This means most of the capture kernel's
> memory is used in supporting hugepages. In most cases, this results in
> out-of-memory issues while booting FADump capture kernel. But hugepages
> are not of much use in capture kernel whose only job is to save vmcore.
> So, disabling hugepages support, when fadump is active, is a reliable
> solution for the out of memory issues. Introducing a flag variable to
> disable HugeTLB support when fadump is active.
> 
> Signed-off-by: Hari Bathini 
> ---
> 
> Changes in v2:
> * Introduce a hugetlb_disabled flag to enable/disable hugepage support &
>   use that flag to disable hugepage support when fadump is active.

Looks good to me.

Reviewed-by: Mahesh Salgaonkar 

> 
> 
>  arch/powerpc/include/asm/page.h |1 +
>  arch/powerpc/kernel/fadump.c|8 
>  arch/powerpc/mm/hash_utils_64.c |6 --
>  arch/powerpc/mm/hugetlbpage.c   |7 +++
>  4 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index 8da5d4c..40aee93 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -39,6 +39,7 @@
> 
>  #ifndef __ASSEMBLY__
>  #ifdef CONFIG_HUGETLB_PAGE
> +extern bool hugetlb_disabled;
>  extern unsigned int HPAGE_SHIFT;
>  #else
>  #define HPAGE_SHIFT PAGE_SHIFT
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index bea8d5f..8ceabef4 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -402,6 +402,14 @@ int __init fadump_reserve_mem(void)
>   if (fw_dump.dump_active) {
>   pr_info("Firmware-assisted dump is active.\n");
> 
> +#ifdef CONFIG_HUGETLB_PAGE
> + /*
> +  * FADump capture kernel doesn't care much about hugepages.
> +  * In fact, handling hugepages in capture kernel is asking for
> +  * trouble. So, disable HugeTLB support when fadump is active.
> +  */
> + hugetlb_disabled = true;
> +#endif
>   /*
>* If last boot has crashed then reserve all the memory
>* above boot_memory_size so that we don't touch it until
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index cf290d41..eab8f1d 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -571,8 +571,10 @@ static void __init htab_scan_page_sizes(void)
>   }
> 
>  #ifdef CONFIG_HUGETLB_PAGE
> - /* Reserve 16G huge page memory sections for huge pages */
> - of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
> + if (!hugetlb_disabled) {
> + /* Reserve 16G huge page memory sections for huge pages */
> + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
> + }
>  #endif /* CONFIG_HUGETLB_PAGE */
>  }
> 
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 876da2b..18c080a 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -35,6 +35,8 @@
>  #define PAGE_SHIFT_16M   24
>  #define PAGE_SHIFT_16G   34
> 
> +bool hugetlb_disabled = false;
> +
>  unsigned int HPAGE_SHIFT;
>  EXPORT_SYMBOL(HPAGE_SHIFT);
> 
> @@ -653,6 +655,11 @@ static int __init hugetlbpage_init(void)
>  {
>   int psize;
> 
> + if (hugetlb_disabled) {
> + pr_info("HugeTLB support is disabled!\n");
> + return 0;
> + }
> +
>  #if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
>   if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
>   return -ENODEV;
> 



Re: sparc/ppc/arm compat siginfo ABI regressions: sending SIGFPE via kill() returns wrong values in si_pid and si_uid

2018-04-11 Thread Linus Torvalds
On Wed, Apr 11, 2018 at 6:34 PM, Dmitry V. Levin  wrote:
>
> There is a clear pattern of sneaking in ABI changes using innocently
> looking commit messages.

Yes, this siginfo stuff has been a mess.

Eric - this needs to stop. Or we need to revert all that garbage entirely.

Send a fix. And stop changing the siginfo layout or field values.

 Linus


Re: sparc/ppc/arm compat siginfo ABI regressions: sending SIGFPE via kill() returns wrong values in si_pid and si_uid

2018-04-11 Thread Dmitry V. Levin
Hi,

On Mon, Apr 09, 2018 at 06:22:53PM +0300, Dmitry V. Levin wrote:
> There seems to be a regression in v4.16 on ppc compat very similar
> to sparc compat regression reported earlier at
> https://marc.info/?l=linux-sparc=151501500704383 .
> 
> The symptoms are exactly the same: the same signal_receive test from
> the strace test suite fails with the same diagnostics:
> https://build.opensuse.org/public/build/home:ldv_alt/openSUSE_Factory_PowerPC/ppc/strace/_log

The log is big, just look for "KERNEL BUG".

> Unfortunately, I do not have any means to investigate further,
> so just passing this information on to those who care.

OK, the faulty commit is v4.16-rc1~159^2~39
("signal/powerpc: Document conflicts with SI_USER and SIGFPE and SIGTRAP").

One might think that a commit called "Document conflicts" shouldn't
introduce an ABI regression, but this one definitely does by defining
FPE_FIXME and TRAP_FIXME in arch/powerpc/include/uapi/asm/siginfo.h
that affect siginfo_layout().

A similar commit v4.16-rc1~159^2~37
("signal/arm: Document conflicts with SI_USER and SIGFPE") must have
introduced a similar ABI regression to compat arm.

An earlier commit v4.14-rc1~60^2^2~5
("signal/sparc: Document a conflict with SI_USER with SIGFPE") introduced
a similar ABI regression to compat sparc.

There is a clear pattern of sneaking in ABI changes using innocently
looking commit messages.


-- 
ldv


signature.asc
Description: PGP signature


[PATCH v2 07/10] block/swim: Rename macros to avoid inconsistent inverted logic

2018-04-11 Thread Finn Thain
The Sony drive status bits use active-low logic. The swim_readbit()
function converts that to 'C' logic for readability. Hence, the
sense of the names of the status bit macros should not be inverted.

Mostly they are correct. However, the TWOMEG_DRIVE, MFM_MODE and
TWOMEG_MEDIA macros have inverted sense (like MkLinux). Fix this
inconsistency and make the following patches less confusing.

The same problem affects swim3.c so fix that too.

No functional change.

The FDHD drive status bits are documented in sonydriv.cpp from MAME
and in swimiii.h from MkLinux.

Cc: Laurent Vivier 
Cc: Benjamin Herrenschmidt 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Jens Axboe 
Cc: sta...@vger.kernel.org # v4.14+
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
Acked-by: Laurent Vivier 
---
 drivers/block/swim.c  | 8 
 drivers/block/swim3.c | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 7b847170cf71..d1ee4670666a 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -110,7 +110,7 @@ struct iwm {
 /* Select values for swim_select and swim_readbit */
 
 #define READ_DATA_00x074
-#define TWOMEG_DRIVE   0x075
+#define ONEMEG_DRIVE   0x075
 #define SINGLE_SIDED   0x076
 #define DRIVE_PRESENT  0x077
 #define DISK_IN0x170
@@ -118,9 +118,9 @@ struct iwm {
 #define TRACK_ZERO 0x172
 #define TACHO  0x173
 #define READ_DATA_10x174
-#define MFM_MODE   0x175
+#define GCR_MODE   0x175
 #define SEEK_COMPLETE  0x176
-#define ONEMEG_MEDIA   0x177
+#define TWOMEG_MEDIA   0x177
 
 /* Bits in handshake register */
 
@@ -612,7 +612,7 @@ static void setup_medium(struct floppy_state *fs)
struct floppy_struct *g;
fs->disk_in = 1;
fs->write_protected = swim_readbit(base, WRITE_PROT);
-   fs->type = swim_readbit(base, ONEMEG_MEDIA);
+   fs->type = swim_readbit(base, TWOMEG_MEDIA);
 
if (swim_track00(base))
printk(KERN_ERR
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index af51015d056e..469541c1e51e 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -148,7 +148,7 @@ struct swim3 {
 #define MOTOR_ON   2
 #define RELAX  3   /* also eject in progress */
 #define READ_DATA_04
-#define TWOMEG_DRIVE   5
+#define ONEMEG_DRIVE   5
 #define SINGLE_SIDED   6   /* drive or diskette is 4MB type? */
 #define DRIVE_PRESENT  7
 #define DISK_IN8
@@ -156,9 +156,9 @@ struct swim3 {
 #define TRACK_ZERO 10
 #define TACHO  11
 #define READ_DATA_112
-#define MFM_MODE   13
+#define GCR_MODE   13
 #define SEEK_COMPLETE  14
-#define ONEMEG_MEDIA   15
+#define TWOMEG_MEDIA   15
 
 /* Definitions of values used in writing and formatting */
 #define DATA_ESCAPE0x99
-- 
2.16.1



Re: [GIT PULL] asm-generic fixes for v4.17-rc1

2018-04-11 Thread Linus Torvalds
On Wed, Apr 11, 2018 at 8:40 AM, Arnd Bergmann  wrote:
>
> are available in the git repository at:
>
>   
> git+ssh://gitol...@ra.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git
> tags/asm-generic

Yeah, no they aren't available there at all.

That tag is some old tag from two years ago that just contains your
ancient "asm-generic: use compat version for preadv2 and pwritev2".

Forgot to push out? Or forgot to use "-f" to overwrite the old tag?

 Linus


[GIT PULL] asm-generic fixes for v4.17-rc1

2018-04-11 Thread Arnd Bergmann
The following changes since commit 0adb32858b0bddf4ada5f364a84ed60b196dbcda:

  Linux 4.16 (2018-04-01 14:20:27 -0700)

are available in the git repository at:

  git+ssh://gitol...@ra.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git
tags/asm-generic

for you to fetch changes up to a71e7c44ffb7baea0c0795824afc34cc0bc1a301:

  io: change writeX_relaxed() to remove barriers (2018-04-10 16:37:34 +0200)


asm-generic fixes for v4.17-rc1

I have one regression fix for a minor build problem after the architecture
removal series, plus a rework of the barriers in the readl/writel
functions, thanks to work by Sinan Kaya:

This started from a discussion on the linuxpcc and rdma mailing lists
[1]. To summarize, we decided that architectures are responsible to
serialize readl() and writel() accesses on a device MMIO space relative
to DMA performed by that device.

This series provides a pessimistic implementation of that behavior for
asm-generic/io.h, which is in turn used by a number of architectures
(h8300, microblaze, nios2, openrisc, s390, sparc, um, unicore32, and
xtensa). Some of those presumably need no extra barriers, or something
weaker than rmb()/wmb(), and they are advised to override the new default
for better performance.

For inb()/outb(), the same barriers are used, but architectures might
want to add another barrier to outb() here if that can guarantee
non-posted behavior (some architectures can, others cannot do that).

The readl_relaxed()/writel_relaxed() family of functions retains the
existing behavior with no extra barriers.

[1]: https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-March/170481.html


Liu, Changcheng (1):
  dts: remove cris & metag dts hard link file

Sinan Kaya (7):
  io: define several IO & PIO barrier types for the asm-generic version
  io: define stronger ordering for the default readX() implementation
  io: define stronger ordering for the default writeX() implementation
  io: change outX() to have their own IO barrier overrides
  io: change inX() to have their own IO barrier overrides
  io: change readX_relaxed() to remove barriers
  io: change writeX_relaxed() to remove barriers

 include/asm-generic/io.h   | 161
++
 scripts/dtc/include-prefixes/cris  |   1 -
 scripts/dtc/include-prefixes/metag |   1 -
 3 files changed, 143 insertions(+), 20 deletions(-)
 delete mode 12 scripts/dtc/include-prefixes/cris
 delete mode 12 scripts/dtc/include-prefixes/metag


Re: powerpc/modules: Fix crashes by adding CONFIG_RELOCATABLE to vermagic

2018-04-11 Thread Ard Biesheuvel
On 11 April 2018 at 16:49, Michael Ellerman
 wrote:
> On Tue, 2018-04-10 at 01:22:06 UTC, Michael Ellerman wrote:
>> If you build the kernel with CONFIG_RELOCATABLE=n, then install the
>> modules, rebuild the kernel with CONFIG_RELOCATABLE=y and leave the
>> old modules installed, we crash something like:
>>
>>   Unable to handle kernel paging request for data at address 
>> 0xd00018d66cef
>>   Faulting instruction address: 0xc21ddd08
>>   Oops: Kernel access of bad area, sig: 11 [#1]
>>   Modules linked in: x_tables autofs4
>>   CPU: 2 PID: 1 Comm: systemd Not tainted 4.16.0-rc6-gcc_ubuntu_le-g99fec39 
>> #1
>>   ...
>>   NIP check_version.isra.22+0x118/0x170
>>   Call Trace:
>> __ksymtab_xt_unregister_table+0x58/0xfcb8 [x_tables] 
>> (unreliable)
>> resolve_symbol+0xb4/0x150
>> load_module+0x10e8/0x29a0
>> SyS_finit_module+0x110/0x140
>> system_call+0x58/0x6c
>>
>> This happens because since commit 71810db27c1c ("modversions: treat
>> symbol CRCs as 32 bit quantities"), a relocatable kernel encodes and
>> handles symbol CRCs differently from a non-relocatable kernel.
>>
>> Although it's possible we could try and detect this situation and
>> handle it, it's much more robust to simply make the state of
>> CONFIG_RELOCATABLE part of the module vermagic.
>>
>> Fixes: 71810db27c1c ("modversions: treat symbol CRCs as 32 bit quantities")
>> Signed-off-by: Michael Ellerman 
>
> Applied to powerpc fixes.
>
> https://git.kernel.org/powerpc/c/73aca179d78eaa11604ba0783a6d8b
>
> cheers

Thanks for the cc. I guess this only affects powerpc, given that it is
the only arch that switches between CRC immediate values and CRC
offsets depending on the configuration.


Re: [3/3] powerpc/powernv: Fix OPAL NVRAM driver OPAL_BUSY loops

2018-04-11 Thread Michael Ellerman
On Tue, 2018-04-10 at 11:49:33 UTC, Nicholas Piggin wrote:
> The OPAL NVRAM driver does not sleep in case it gets OPAL_BUSY or
> OPAL_BUSY_EVENT from firmware, which causes large scheduling
> latencies, and various lockup errors to trigger (again, BMC reboot
> can cause it).
> 
> Fix this by converting it to the standard form OPAL_BUSY loop that
> sleeps.
> 
> Fixes: 628daa8d5abfd ("powerpc/powernv: Add RTC and NVRAM support plus RTAS 
> fallbacks")
> Cc: Benjamin Herrenschmidt 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/3b8070335f751aac9f1526ae2e012e

cheers


Re: [1/3] powerpc/powernv: define a standard delay for OPAL_BUSY type retry loops

2018-04-11 Thread Michael Ellerman
On Tue, 2018-04-10 at 11:49:31 UTC, Nicholas Piggin wrote:
> This is the start of an effort to tidy up and standardise all the
> delays. Existing loops have a range of delay/sleep periods from 1ms
> to 20ms, and some have no delay. They all loop forever except rtc,
> which times out after 10 retries, and that uses 10ms delays. So use
> 10ms as our standard delay. The OPAL maintainer agrees 10ms is a
> reasonable starting point.
> 
> The idea is to use the same recipe everywhere, once this is proven to
> work then it will be documented as an OPAL API standard. Then both
> firmware and OS can agree, and if a particular call needs something
> else, then that can be documented with reasoning.
> 
> This is not the end-all of this effort, it's just a relatively easy
> change that fixes some existing high latency delays. There should be
> provision for standardising timeouts and/or interruptible loops where
> possible, so non-fatal firmware errors don't cause hangs.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/34dd25de9fe3f60bfdb31b473bf04b

cheers


Re: powerpc/8xx: Build fix with Hugetlbfs enabled

2018-04-11 Thread Michael Ellerman
On Tue, 2018-04-10 at 08:51:26 UTC, "Aneesh Kumar K.V" wrote:
> 8xx use slice code when hugetlbfs is enabled. We missed a header include on
> 8xx which resulted in the below build failure.
> 
> config: mpc885_ads_defconfig + CONFIG_HUGETLBFS
> 
>CC  arch/powerpc/mm/slice.o
> arch/powerpc/mm/slice.c: In function 'slice_get_unmapped_area':
> arch/powerpc/mm/slice.c:655:2: error: implicit declaration of function 
> 'need_extra_context' [-Werror=implicit-function-declaration]
> arch/powerpc/mm/slice.c:656:3: error: implicit declaration of function 
> 'alloc_extended_context' [-Werror=implicit-function-declaration]
> cc1: all warnings being treated as errors
> make[1]: *** [arch/powerpc/mm/slice.o] Error 1
> make: *** [arch/powerpc/mm] Error 2
> 
> on PPC64 the mmu_context.h was included via linux/pkeys.h
> 
> CC: Christophe LEROY 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/032900e62c176d75923baf95ad880e

cheers


Re: powerpc/modules: Fix crashes by adding CONFIG_RELOCATABLE to vermagic

2018-04-11 Thread Michael Ellerman
On Tue, 2018-04-10 at 01:22:06 UTC, Michael Ellerman wrote:
> If you build the kernel with CONFIG_RELOCATABLE=n, then install the
> modules, rebuild the kernel with CONFIG_RELOCATABLE=y and leave the
> old modules installed, we crash something like:
> 
>   Unable to handle kernel paging request for data at address 
> 0xd00018d66cef
>   Faulting instruction address: 0xc21ddd08
>   Oops: Kernel access of bad area, sig: 11 [#1]
>   Modules linked in: x_tables autofs4
>   CPU: 2 PID: 1 Comm: systemd Not tainted 4.16.0-rc6-gcc_ubuntu_le-g99fec39 #1
>   ...
>   NIP check_version.isra.22+0x118/0x170
>   Call Trace:
> __ksymtab_xt_unregister_table+0x58/0xfcb8 [x_tables] 
> (unreliable)
> resolve_symbol+0xb4/0x150
> load_module+0x10e8/0x29a0
> SyS_finit_module+0x110/0x140
> system_call+0x58/0x6c
> 
> This happens because since commit 71810db27c1c ("modversions: treat
> symbol CRCs as 32 bit quantities"), a relocatable kernel encodes and
> handles symbol CRCs differently from a non-relocatable kernel.
> 
> Although it's possible we could try and detect this situation and
> handle it, it's much more robust to simply make the state of
> CONFIG_RELOCATABLE part of the module vermagic.
> 
> Fixes: 71810db27c1c ("modversions: treat symbol CRCs as 32 bit quantities")
> Signed-off-by: Michael Ellerman 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/73aca179d78eaa11604ba0783a6d8b

cheers


Re: [1/2] KVM: PPC: Book3S HV: trace_tlbie must not be called in realmode

2018-04-11 Thread Michael Ellerman
On Thu, 2018-04-05 at 17:56:30 UTC, Nicholas Piggin wrote:
> This crashes with a "Bad real address for load" attempting to load
> from the vmalloc region in realmode (faulting address is in DAR).
> 
>   Oops: Bad interrupt in KVM entry/exit code, sig: 6 [#1]
>   LE SMP NR_CPUS=2048 NUMA PowerNV
>   CPU: 53 PID: 6582 Comm: qemu-system-ppc Not tainted 
> 4.16.0-01530-g43d1859f0994
>   NIP:  c00155ac LR: c00c2430 CTR: c0015580
>   REGS: c00fff76dd80 TRAP: 0200   Not tainted  
> (4.16.0-01530-g43d1859f0994)
>   MSR:  90201003   CR: 4808  XER: 
>   CFAR: 000102900ef0 DAR: d00017fffd941a28 DSISR: 0040 SOFTE: 3
>   NIP [c00155ac] perf_trace_tlbie+0x2c/0x1a0
>   LR [c00c2430] do_tlbies+0x230/0x2f0
> 
> I suspect the reason is the per-cpu data is not in the linear chunk.
> This could be restored if that was able to be fixed, but for now,
> just remove the tracepoints.
> 
> Fixes: 0428491cba ("powerpc/mm: Trace tlbie(l) instructions")
> Signed-off-by: Nicholas Piggin 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/19ce7909ed11c49f7eddf59e7f49cd

cheers


Re: powerpc/64s: Fix section mismatch warnings from setup_rfi_flush()

2018-04-11 Thread Michael Ellerman
On Thu, 2018-04-05 at 12:49:13 UTC, Michael Ellerman wrote:
> The recent LPM changes to setup_rfi_flush() are causing some section
> mismatch warnings because we removed the __init annotation on
> setup_rfi_flush():
> 
>   The function setup_rfi_flush() references
>   the function __init ppc64_bolted_size().
>   the function __init memblock_alloc_base().
> 
> The references are actually in init_fallback_flush(), but that is
> inlined into setup_rfi_flush().
> 
> These references are safe because:
>  - only pseries calls setup_rfi_flush() at runtime
>  - pseries always passes L1D_FLUSH_FALLBACK at boot
>  - so the fallback flush area will always be allocated
>  - so the check in init_fallback_flush() will always return early:
>/* Only allocate the fallback flush area once (at boot time). */
>if (l1d_flush_fallback_area)
>   return;
> 
>  - and therefore we won't actually call the freed init routines.
> 
> We should rework the code to make it safer by default rather than
> relying on the above, but for now as a quick-fix just add a __ref
> annotation to squash the warning.
> 
> Fixes: abf110f3e1ce ("powerpc/rfi-flush: Make it possible to call 
> setup_rfi_flush() again")
> Signed-off-by: Michael Ellerman 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/501a78cbc17c329fabf8e9750a1e9a

cheers


Re: [V2] powerpc/fscr: Enable interrupts earlier before calling get_user()

2018-04-11 Thread Michael Ellerman
On Thu, 2018-03-29 at 06:23:37 UTC, Anshuman Khandual wrote:
> The function get_user() can sleep while trying to fetch instruction
> from user address space and causes the following warning from the
> scheduler.
> 
> BUG: sleeping function called from invalid context
> 
> Though interrupts get enabled back but it happens bit later after
> get_user() is called. This change moves enabling these interrupts
> earlier covering the function get_user(). While at this, lets check
> for kernel mode and crash as this interrupt should not have been
> triggered from the kernel context.
> 
> Signed-off-by: Anshuman Khandual 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/709b973c844c0b4d115ac3a227a2e5

cheers


Re: [PATCH v2] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Nicholas Piggin
On Wed, 11 Apr 2018 22:24:06 +1000
Balbir Singh  wrote:

> On Wed, Apr 11, 2018 at 8:42 PM, Nicholas Piggin  wrote:
> > On Wed, 11 Apr 2018 20:04:45 +1000
> > Balbir Singh  wrote:
> >  
> >> On Wed, Apr 11, 2018 at 7:12 PM, Nicholas Piggin  
> >> wrote:  
> >> > For consideration:
> >> >
> >> > * Add IPv6 support built in + additional modules - Because it's 2018 
> >> > maan.
> >> > * Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.  
> >>
> >> We did not find any benefits with this on a P8 in terms of boot time
> >> with large memory. May be worth reinvestigating  
> >
> > Worth putting in the defconfig just for testing until then?  
> 
> Absolutely!
> 
> >  
> >>  
> >> > * Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware 
> >> > traces.
> >> > * Add MEMORY_FAILURE - Machine check exceptions can now drive memory 
> >> > failure.  
> >
> >  
> > Okay for this one?  
> 
> Yep definitely!
> 
> >  
> >> > * Turn on FANOTIFY - This is the current filesystem notification feature.
> >> > * Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security 
> >> > risk?  
> >>
> >> Yep, should not be in defconfig, IMHO  
> >
> > Why not? Honest question, I hear some things about secure
> > boot when I ask about this option but I'm not quite sure why, or
> > what we are securing here.
> >
> > If the firmware does not want us to mess with scoms, it should
> > restrict the call, no?
> >  
> 
> Yes, firmware definitely should. Do we need inband debugging?

I think it's a matter of convenience when testing and debugging
things. OTOH I haven't used it a great deal myself. Others do
want it if we can just ensure firmware will do the right thing
if we're in some secure configuration.

Thanks,
Nick


Re: [PATCH v2] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Balbir Singh
On Wed, Apr 11, 2018 at 8:42 PM, Nicholas Piggin  wrote:
> On Wed, 11 Apr 2018 20:04:45 +1000
> Balbir Singh  wrote:
>
>> On Wed, Apr 11, 2018 at 7:12 PM, Nicholas Piggin  wrote:
>> > For consideration:
>> >
>> > * Add IPv6 support built in + additional modules - Because it's 2018 maan.
>> > * Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.
>>
>> We did not find any benefits with this on a P8 in terms of boot time
>> with large memory. May be worth reinvestigating
>
> Worth putting in the defconfig just for testing until then?

Absolutely!

>
>>
>> > * Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware 
>> > traces.
>> > * Add MEMORY_FAILURE - Machine check exceptions can now drive memory 
>> > failure.
>
>  
> Okay for this one?

Yep definitely!

>
>> > * Turn on FANOTIFY - This is the current filesystem notification feature.
>> > * Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security 
>> > risk?
>>
>> Yep, should not be in defconfig, IMHO
>
> Why not? Honest question, I hear some things about secure
> boot when I ask about this option but I'm not quite sure why, or
> what we are securing here.
>
> If the firmware does not want us to mess with scoms, it should
> restrict the call, no?
>

Yes, firmware definitely should. Do we need inband debugging?


>
>> > * Turn on async SCSI scanning - Let's see what breaks.
>> >
>> > * Make a bunch of USB hid drivers modules.
>> > * Make SCSI SG, SR, and FC modules - FC is huge.
>> > * Make video drivers (except AST GPU) modules - Also huge.
>> > * Add MLX5 driver as a module - Popular demand.
>> > * Make PCI serial driver a module - Uncommon?
>> >
>> > * Get rid of /dev/port - Not used.
>> > * Remove legacy BSD ttys - Long dead.
>> > * Remove IDE - Deprecated and replaced with ATA.
>> > * Remove WIRELESS - Until we get POWER9 laptops.
>> > * Remove RAW - Long deprecated in favour of direct IO.
>> > * Remove floppy, parport, and PS2 input devices - not supported.
>> > * Remove virtio drivers, ballooning - We're host only.
>>
>> I still think its good to have them, may be as modules? Should I
>> switch to powerpc64le_defconfig for a single config with everything --
>> same kernel as guest and bare metal?
>
> Well powernv_defconfig never supports PAPR guest. I think the ppc64
> defconfig does both (and pseries has no bare metal).
>
> Is there a reason to use them in host? And if yes, which ones? We
> could easily make them as modules.

I guess I should use powerpc64le_defconfig then

Balbir


Re: [PATCH 2/2] powerpc/mm/memtrace: Let the arch hotunplug code flush cache

2018-04-11 Thread Balbir Singh
On Wed, Apr 11, 2018 at 9:05 PM, Michael Ellerman  wrote:
> Balbir Singh  writes:
>
>> Don't do this via custom code, instead now that we have support
>> in the arch hotplug/hotunplug code, rely on those routines
>> to do the right thing.
>>
>> Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in 
>> memory tracing")
>
> That's not really right.
>
> This patch doesn't fix it, the previous patch did.
>
> If I just backport this patch then it's still broken.
>
> So I'll tag patch 1 with the above Fixes: tag and add stable, and then
> this just becomes a cleanup.
>

Fair point, the previous one does indeed fix things. I can do a minimal backport
fixing .size to .line_size if needed.

Cheers,
Balbir


Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Michal Hocko
On Wed 11-04-18 12:32:07, Laurent Dufour wrote:
[...]
> Andrew, should I send a v4 or could you wipe the 2 __maybe_unsued when 
> applying
> the patch ?

A follow $patch-fix should be better rather than post this again and
spam people with more emails.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 2/2] powerpc/mm/memtrace: Let the arch hotunplug code flush cache

2018-04-11 Thread Michael Ellerman
Balbir Singh  writes:

> Don't do this via custom code, instead now that we have support
> in the arch hotplug/hotunplug code, rely on those routines
> to do the right thing.
>
> Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory 
> tracing")

That's not really right.

This patch doesn't fix it, the previous patch did.

If I just backport this patch then it's still broken.

So I'll tag patch 1 with the above Fixes: tag and add stable, and then
this just becomes a cleanup.

OK?

cheers

> diff --git a/arch/powerpc/platforms/powernv/memtrace.c 
> b/arch/powerpc/platforms/powernv/memtrace.c
> index de470caf0784..fc222a0c2ac4 100644
> --- a/arch/powerpc/platforms/powernv/memtrace.c
> +++ b/arch/powerpc/platforms/powernv/memtrace.c
> @@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = {
>   .open   = simple_open,
>  };
>  
> -static void flush_memory_region(u64 base, u64 size)
> -{
> - unsigned long line_size = ppc64_caches.l1d.size;
> - u64 end = base + size;
> - u64 addr;
> -
> - base = round_down(base, line_size);
> - end = round_up(end, line_size);
> -
> - for (addr = base; addr < end; addr += line_size)
> - asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
> -}
> -
>  static int check_memblock_online(struct memory_block *mem, void *arg)
>  {
>   if (mem->state != MEM_ONLINE)
> @@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 
> start_pfn, u64 nr_pages)
>   walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
> change_memblock_state);
>  
> - /* RCU grace period? */
> - flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
> - nr_pages << PAGE_SHIFT);
> -
>   lock_device_hotplug();
>   remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
>   unlock_device_hotplug();
> -- 
> 2.13.6


Re: [PATCH v2] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Nicholas Piggin
On Wed, 11 Apr 2018 20:04:45 +1000
Balbir Singh  wrote:

> On Wed, Apr 11, 2018 at 7:12 PM, Nicholas Piggin  wrote:
> > For consideration:
> >
> > * Add IPv6 support built in + additional modules - Because it's 2018 maan.
> > * Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.  
> 
> We did not find any benefits with this on a P8 in terms of boot time
> with large memory. May be worth reinvestigating

Worth putting in the defconfig just for testing until then?

> 
> > * Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware 
> > traces.
> > * Add MEMORY_FAILURE - Machine check exceptions can now drive memory 
> > failure.

 
Okay for this one?

> > * Turn on FANOTIFY - This is the current filesystem notification feature.
> > * Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security 
> > risk?  
> 
> Yep, should not be in defconfig, IMHO

Why not? Honest question, I hear some things about secure
boot when I ask about this option but I'm not quite sure why, or
what we are securing here.

If the firmware does not want us to mess with scoms, it should
restrict the call, no?


> > * Turn on async SCSI scanning - Let's see what breaks.
> >
> > * Make a bunch of USB hid drivers modules.
> > * Make SCSI SG, SR, and FC modules - FC is huge.
> > * Make video drivers (except AST GPU) modules - Also huge.
> > * Add MLX5 driver as a module - Popular demand.
> > * Make PCI serial driver a module - Uncommon?
> >
> > * Get rid of /dev/port - Not used.
> > * Remove legacy BSD ttys - Long dead.
> > * Remove IDE - Deprecated and replaced with ATA.
> > * Remove WIRELESS - Until we get POWER9 laptops.
> > * Remove RAW - Long deprecated in favour of direct IO.
> > * Remove floppy, parport, and PS2 input devices - not supported.
> > * Remove virtio drivers, ballooning - We're host only.  
> 
> I still think its good to have them, may be as modules? Should I
> switch to powerpc64le_defconfig for a single config with everything --
> same kernel as guest and bare metal?

Well powernv_defconfig never supports PAPR guest. I think the ppc64
defconfig does both (and pseries has no bare metal).

Is there a reason to use them in host? And if yes, which ones? We
could easily make them as modules.

Thanks,
Nick


Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Laurent Dufour
On 11/04/2018 11:09, Christophe LEROY wrote:
> 
> 
> Le 11/04/2018 à 11:03, Laurent Dufour a écrit :
>>
>>
>> On 11/04/2018 10:58, Christophe LEROY wrote:
>>>
>>>
>>> Le 11/04/2018 à 10:03, Laurent Dufour a écrit :
 Remove the additional define HAVE_PTE_SPECIAL and rely directly on
 CONFIG_ARCH_HAS_PTE_SPECIAL.

 There is no functional change introduced by this patch

 Signed-off-by: Laurent Dufour 
 ---
    mm/memory.c | 19 ---
    1 file changed, 8 insertions(+), 11 deletions(-)

 diff --git a/mm/memory.c b/mm/memory.c
 index 96910c625daa..7f7dc7b2a341 100644
 --- a/mm/memory.c
 +++ b/mm/memory.c
 @@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma,
 unsigned long addr,
     * PFNMAP mappings in order to support COWable mappings.
     *
     */
 -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 -# define HAVE_PTE_SPECIAL 1
 -#else
 -# define HAVE_PTE_SPECIAL 0
 -#endif
    struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long
 addr,
     pte_t pte, bool with_public_device)
    {
    unsigned long pfn = pte_pfn(pte);
    -    if (HAVE_PTE_SPECIAL) {
 +    if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
    if (likely(!pte_special(pte)))
    goto check_pfn;
    if (vma->vm_ops && vma->vm_ops->find_special_page)
 @@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct 
 *vma,
 unsigned long addr,
    return NULL;
    }
    -    /* !HAVE_PTE_SPECIAL case follows: */
 +    /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
      if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
    if (vma->vm_flags & VM_MIXEDMAP) {
 @@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct 
 *vma,
 unsigned long addr,
      if (is_zero_pfn(pfn))
    return NULL;
 -check_pfn:
 +
 +check_pfn: __maybe_unused
>>>
>>> See below
>>>
    if (unlikely(pfn > highest_memmap_pfn)) {
    print_bad_pte(vma, addr, pte, NULL);
    return NULL;
 @@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct 
 *vma,
 unsigned long addr,
     * NOTE! We still have PageReserved() pages in the page tables.
     * eg. VDSO mappings can cause them to exist.
     */
 -out:
 +out: __maybe_unused
>>>
>>> Why do you need that change ?
>>>
>>> There is no reason for the compiler to complain. It would complain if the 
>>> goto
>>> was within a #ifdef, but all the purpose of using IS_ENABLED() is to allow 
>>> the
>>> compiler to properly handle all possible cases. That's all the force of
>>> IS_ENABLED() compared to ifdefs, and that the reason why they are 
>>> plebicited,
>>> ref Linux Codying style for a detailed explanation.
>>
>> Fair enough.
>>
>> Should I submit a v4 just to remove these so ugly __maybe_unused ?
>>
> 
> Most likely, unless the mm maintainer agrees to remove them by himself when
> applying your patch ?

That was my point.

Andrew, should I send a v4 or could you wipe the 2 __maybe_unsued when applying
the patch ?

Thanks,
Laurent.



Re: [PATCH v2] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Balbir Singh
On Wed, Apr 11, 2018 at 7:12 PM, Nicholas Piggin  wrote:
> For consideration:
>
> * Add IPv6 support built in + additional modules - Because it's 2018 maan.
> * Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.

We did not find any benefits with this on a P8 in terms of boot time
with large memory. May be worth reinvestigating

> * Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware traces.
> * Add MEMORY_FAILURE - Machine check exceptions can now drive memory failure.
> * Turn on FANOTIFY - This is the current filesystem notification feature.
> * Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security risk?

Yep, should not be in defconfig, IMHO

> * Turn on async SCSI scanning - Let's see what breaks.
>
> * Make a bunch of USB hid drivers modules.
> * Make SCSI SG, SR, and FC modules - FC is huge.
> * Make video drivers (except AST GPU) modules - Also huge.
> * Add MLX5 driver as a module - Popular demand.
> * Make PCI serial driver a module - Uncommon?
>
> * Get rid of /dev/port - Not used.
> * Remove legacy BSD ttys - Long dead.
> * Remove IDE - Deprecated and replaced with ATA.
> * Remove WIRELESS - Until we get POWER9 laptops.
> * Remove RAW - Long deprecated in favour of direct IO.
> * Remove floppy, parport, and PS2 input devices - not supported.
> * Remove virtio drivers, ballooning - We're host only.

I still think its good to have them, may be as modules? Should I
switch to powerpc64le_defconfig for a single config with everything --
same kernel as guest and bare metal?

> * Remove PPP - Sorry Paulus.
> * Make more things modules, NFS FS, RAM disk, netconsole, MS-DOS fs.
>
> This does not disable FTRACE options, that's just an optimisation the
> defconfig target made. make powernv_defconfig adds those to .config.
>
> This results in a significantly smaller vmlinux:
>
>textdata bss dec hex filename
> 131217795284224 1383776 1978977912df7d3 vmlinux
> 121262734771930 1341464 1823966711650b3 vmlinux
>
> Signed-off-by: Nicholas Piggin 
> ---

Balbir Singh.


[PATCH v2] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Nicholas Piggin
For consideration:

* Add IPv6 support built in + additional modules - Because it's 2018 maan.
* Add DEFERRED_STRUCT_PAGE_INIT - Let's see what breaks.
* Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware traces.
* Add MEMORY_FAILURE - Machine check exceptions can now drive memory failure.
* Turn on FANOTIFY - This is the current filesystem notification feature.
* Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security risk?
* Turn on async SCSI scanning - Let's see what breaks.

* Make a bunch of USB hid drivers modules.
* Make SCSI SG, SR, and FC modules - FC is huge.
* Make video drivers (except AST GPU) modules - Also huge.
* Add MLX5 driver as a module - Popular demand.
* Make PCI serial driver a module - Uncommon?

* Get rid of /dev/port - Not used.
* Remove legacy BSD ttys - Long dead.
* Remove IDE - Deprecated and replaced with ATA.
* Remove WIRELESS - Until we get POWER9 laptops.
* Remove RAW - Long deprecated in favour of direct IO.
* Remove floppy, parport, and PS2 input devices - not supported.
* Remove virtio drivers, ballooning - We're host only.
* Remove PPP - Sorry Paulus.
* Make more things modules, NFS FS, RAM disk, netconsole, MS-DOS fs.

This does not disable FTRACE options, that's just an optimisation the
defconfig target made. make powernv_defconfig adds those to .config.

This results in a significantly smaller vmlinux:

   textdata bss dec hex filename
131217795284224 1383776 1978977912df7d3 vmlinux
121262734771930 1341464 1823966711650b3 vmlinux

Signed-off-by: Nicholas Piggin 
---
Changes since v1:
- Imagine the "I have no idea what I'm doing" scientist dog meme.
 
 arch/powerpc/configs/powernv_defconfig | 105 +++--
 1 file changed, 62 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/configs/powernv_defconfig 
b/arch/powerpc/configs/powernv_defconfig
index 9e92aa6a52ba..371169362683 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -38,7 +38,9 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
+CONFIG_SCOM_DEBUGFS=y
 CONFIG_OPAL_PRD=y
+CONFIG_PPC_MEMTRACE=y
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
@@ -54,7 +56,10 @@ CONFIG_NUMA=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
 CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
@@ -72,7 +77,13 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_IPV6_SIT=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
 CONFIG_BRIDGE=m
@@ -81,33 +92,28 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_CLS_BPF=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_BPF=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_POWERNV_FLASH=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM=m
 CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
 CONFIG_BLK_DEV_NVME=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
-CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR=m
 CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SG=m
 CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_SRP_ATTRS=y
 CONFIG_SCSI_CXGB3_ISCSI=m
 CONFIG_SCSI_CXGB4_ISCSI=m
@@ -121,7 +127,6 @@ CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
 CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_ALUA=m
@@ -152,16 +157,16 @@ CONFIG_DUMMY=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_VXLAN=m
-CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE=m
 CONFIG_TUN=m
 CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 CONFIG_PCNET32=m
 CONFIG_TIGON3=y
 CONFIG_BNX2X=m
+# CONFIG_CAVIUM_PTP is not set
 CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_S2IO=m
@@ -172,46 +177,62 @@ CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_FPGA=y
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_CORE_IPOIB=y
 CONFIG_MYRI10GE=m
 CONFIG_QLGE=m
 CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
+CONFIG_USB_NET_DRIVERS=m
+# CONFIG_WLAN is not set
 

Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Christophe LEROY



Le 11/04/2018 à 10:41, Laurent Dufour a écrit :

On 11/04/2018 10:33, Michal Hocko wrote:

On Wed 11-04-18 10:03:36, Laurent Dufour wrote:

@@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
  
  	if (is_zero_pfn(pfn))

return NULL;
-check_pfn:
+
+check_pfn: __maybe_unused
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
@@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
 * NOTE! We still have PageReserved() pages in the page tables.
 * eg. VDSO mappings can cause them to exist.
 */
-out:
+out: __maybe_unused
return pfn_to_page(pfn);


Why do we need this ugliness all of the sudden?

Indeed the compiler doesn't complaint but in theory it should since these
labels are not used depending on CONFIG_ARCH_HAS_PTE_SPECIAL.


Why should it complain ?

Regards
Christophe





Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Christophe LEROY



Le 11/04/2018 à 11:03, Laurent Dufour a écrit :



On 11/04/2018 10:58, Christophe LEROY wrote:



Le 11/04/2018 à 10:03, Laurent Dufour a écrit :

Remove the additional define HAVE_PTE_SPECIAL and rely directly on
CONFIG_ARCH_HAS_PTE_SPECIAL.

There is no functional change introduced by this patch

Signed-off-by: Laurent Dufour 
---
   mm/memory.c | 19 ---
   1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 96910c625daa..7f7dc7b2a341 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma,
unsigned long addr,
    * PFNMAP mappings in order to support COWable mappings.
    *
    */
-#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-# define HAVE_PTE_SPECIAL 1
-#else
-# define HAVE_PTE_SPECIAL 0
-#endif
   struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
    pte_t pte, bool with_public_device)
   {
   unsigned long pfn = pte_pfn(pte);
   -    if (HAVE_PTE_SPECIAL) {
+    if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
   if (likely(!pte_special(pte)))
   goto check_pfn;
   if (vma->vm_ops && vma->vm_ops->find_special_page)
@@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
unsigned long addr,
   return NULL;
   }
   -    /* !HAVE_PTE_SPECIAL case follows: */
+    /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
     if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
   if (vma->vm_flags & VM_MIXEDMAP) {
@@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
unsigned long addr,
     if (is_zero_pfn(pfn))
   return NULL;
-check_pfn:
+
+check_pfn: __maybe_unused


See below


   if (unlikely(pfn > highest_memmap_pfn)) {
   print_bad_pte(vma, addr, pte, NULL);
   return NULL;
@@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
unsigned long addr,
    * NOTE! We still have PageReserved() pages in the page tables.
    * eg. VDSO mappings can cause them to exist.
    */
-out:
+out: __maybe_unused


Why do you need that change ?

There is no reason for the compiler to complain. It would complain if the goto
was within a #ifdef, but all the purpose of using IS_ENABLED() is to allow the
compiler to properly handle all possible cases. That's all the force of
IS_ENABLED() compared to ifdefs, and that the reason why they are plebicited,
ref Linux Codying style for a detailed explanation.


Fair enough.

Should I submit a v4 just to remove these so ugly __maybe_unused ?



Most likely, unless the mm maintainer agrees to remove them by himself 
when applying your patch ?


Christophe


Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Laurent Dufour


On 11/04/2018 10:58, Christophe LEROY wrote:
> 
> 
> Le 11/04/2018 à 10:03, Laurent Dufour a écrit :
>> Remove the additional define HAVE_PTE_SPECIAL and rely directly on
>> CONFIG_ARCH_HAS_PTE_SPECIAL.
>>
>> There is no functional change introduced by this patch
>>
>> Signed-off-by: Laurent Dufour 
>> ---
>>   mm/memory.c | 19 ---
>>   1 file changed, 8 insertions(+), 11 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 96910c625daa..7f7dc7b2a341 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma,
>> unsigned long addr,
>>    * PFNMAP mappings in order to support COWable mappings.
>>    *
>>    */
>> -#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
>> -# define HAVE_PTE_SPECIAL 1
>> -#else
>> -# define HAVE_PTE_SPECIAL 0
>> -#endif
>>   struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long 
>> addr,
>>    pte_t pte, bool with_public_device)
>>   {
>>   unsigned long pfn = pte_pfn(pte);
>>   -    if (HAVE_PTE_SPECIAL) {
>> +    if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
>>   if (likely(!pte_special(pte)))
>>   goto check_pfn;
>>   if (vma->vm_ops && vma->vm_ops->find_special_page)
>> @@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
>> unsigned long addr,
>>   return NULL;
>>   }
>>   -    /* !HAVE_PTE_SPECIAL case follows: */
>> +    /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
>>     if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
>>   if (vma->vm_flags & VM_MIXEDMAP) {
>> @@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
>> unsigned long addr,
>>     if (is_zero_pfn(pfn))
>>   return NULL;
>> -check_pfn:
>> +
>> +check_pfn: __maybe_unused
> 
> See below
> 
>>   if (unlikely(pfn > highest_memmap_pfn)) {
>>   print_bad_pte(vma, addr, pte, NULL);
>>   return NULL;
>> @@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma,
>> unsigned long addr,
>>    * NOTE! We still have PageReserved() pages in the page tables.
>>    * eg. VDSO mappings can cause them to exist.
>>    */
>> -out:
>> +out: __maybe_unused
> 
> Why do you need that change ?
> 
> There is no reason for the compiler to complain. It would complain if the goto
> was within a #ifdef, but all the purpose of using IS_ENABLED() is to allow the
> compiler to properly handle all possible cases. That's all the force of
> IS_ENABLED() compared to ifdefs, and that the reason why they are plebicited,
> ref Linux Codying style for a detailed explanation.

Fair enough.

Should I submit a v4 just to remove these so ugly __maybe_unused ?



Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Christophe LEROY



Le 11/04/2018 à 10:03, Laurent Dufour a écrit :

Remove the additional define HAVE_PTE_SPECIAL and rely directly on
CONFIG_ARCH_HAS_PTE_SPECIAL.

There is no functional change introduced by this patch

Signed-off-by: Laurent Dufour 
---
  mm/memory.c | 19 ---
  1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 96910c625daa..7f7dc7b2a341 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma, 
unsigned long addr,
   * PFNMAP mappings in order to support COWable mappings.
   *
   */
-#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-# define HAVE_PTE_SPECIAL 1
-#else
-# define HAVE_PTE_SPECIAL 0
-#endif
  struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 pte_t pte, bool with_public_device)
  {
unsigned long pfn = pte_pfn(pte);
  
-	if (HAVE_PTE_SPECIAL) {

+   if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
if (likely(!pte_special(pte)))
goto check_pfn;
if (vma->vm_ops && vma->vm_ops->find_special_page)
@@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
return NULL;
}
  
-	/* !HAVE_PTE_SPECIAL case follows: */

+   /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
  
  	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {

if (vma->vm_flags & VM_MIXEDMAP) {
@@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
  
  	if (is_zero_pfn(pfn))

return NULL;
-check_pfn:
+
+check_pfn: __maybe_unused


See below


if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
@@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
 * NOTE! We still have PageReserved() pages in the page tables.
 * eg. VDSO mappings can cause them to exist.
 */
-out:
+out: __maybe_unused


Why do you need that change ?

There is no reason for the compiler to complain. It would complain if 
the goto was within a #ifdef, but all the purpose of using IS_ENABLED() 
is to allow the compiler to properly handle all possible cases. That's 
all the force of IS_ENABLED() compared to ifdefs, and that the reason 
why they are plebicited, ref Linux Codying style for a detailed explanation.


Christophe



return pfn_to_page(pfn);
  }
  
@@ -904,7 +900,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,

/*
 * There is no pmd_special() but there may be special pmds, e.g.
 * in a direct-access (dax) mapping, so let's just replicate the
-* !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+* !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
 */
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
@@ -1933,7 +1929,8 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, 
unsigned long addr,
 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
 * without pte special, it would there be refcounted as a normal page.
 */
-   if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
+   if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
+   !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page;
  
  		/*




Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Michal Hocko
On Wed 11-04-18 10:41:23, Laurent Dufour wrote:
> On 11/04/2018 10:33, Michal Hocko wrote:
> > On Wed 11-04-18 10:03:36, Laurent Dufour wrote:
> >> @@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct 
> >> *vma, unsigned long addr,
> >>  
> >>if (is_zero_pfn(pfn))
> >>return NULL;
> >> -check_pfn:
> >> +
> >> +check_pfn: __maybe_unused
> >>if (unlikely(pfn > highest_memmap_pfn)) {
> >>print_bad_pte(vma, addr, pte, NULL);
> >>return NULL;
> >> @@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct 
> >> *vma, unsigned long addr,
> >> * NOTE! We still have PageReserved() pages in the page tables.
> >> * eg. VDSO mappings can cause them to exist.
> >> */
> >> -out:
> >> +out: __maybe_unused
> >>return pfn_to_page(pfn);
> > 
> > Why do we need this ugliness all of the sudden?
> Indeed the compiler doesn't complaint but in theory it should since these
> labels are not used depending on CONFIG_ARCH_HAS_PTE_SPECIAL.

Well, such a warning would be quite pointless so I would rather not make
the code ugly. The value of unused label is quite questionable to start
with...

-- 
Michal Hocko
SUSE Labs


Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Laurent Dufour
On 11/04/2018 10:33, Michal Hocko wrote:
> On Wed 11-04-18 10:03:36, Laurent Dufour wrote:
>> @@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
>> unsigned long addr,
>>  
>>  if (is_zero_pfn(pfn))
>>  return NULL;
>> -check_pfn:
>> +
>> +check_pfn: __maybe_unused
>>  if (unlikely(pfn > highest_memmap_pfn)) {
>>  print_bad_pte(vma, addr, pte, NULL);
>>  return NULL;
>> @@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
>> unsigned long addr,
>>   * NOTE! We still have PageReserved() pages in the page tables.
>>   * eg. VDSO mappings can cause them to exist.
>>   */
>> -out:
>> +out: __maybe_unused
>>  return pfn_to_page(pfn);
> 
> Why do we need this ugliness all of the sudden?
Indeed the compiler doesn't complaint but in theory it should since these
labels are not used depending on CONFIG_ARCH_HAS_PTE_SPECIAL.



Re: [PATCH v3 1/2] mm: introduce ARCH_HAS_PTE_SPECIAL

2018-04-11 Thread Michal Hocko
On Wed 11-04-18 10:03:35, Laurent Dufour wrote:
> Currently the PTE special supports is turned on in per architecture header
> files. Most of the time, it is defined in arch/*/include/asm/pgtable.h
> depending or not on some other per architecture static definition.
> 
> This patch introduce a new configuration variable to manage this directly
> in the Kconfig files. It would later replace __HAVE_ARCH_PTE_SPECIAL.
> 
> Here notes for some architecture where the definition of
> __HAVE_ARCH_PTE_SPECIAL is not obvious:
> 
> arm
>  __HAVE_ARCH_PTE_SPECIAL which is currently defined in
> arch/arm/include/asm/pgtable-3level.h which is included by
> arch/arm/include/asm/pgtable.h when CONFIG_ARM_LPAE is set.
> So select ARCH_HAS_PTE_SPECIAL if ARM_LPAE.
> 
> powerpc
> __HAVE_ARCH_PTE_SPECIAL is defined in 2 files:
>  - arch/powerpc/include/asm/book3s/64/pgtable.h
>  - arch/powerpc/include/asm/pte-common.h
> The first one is included if (PPC_BOOK3S & PPC64) while the second is
> included in all the other cases.
> So select ARCH_HAS_PTE_SPECIAL all the time.
> 
> sparc:
> __HAVE_ARCH_PTE_SPECIAL is defined if defined(__sparc__) &&
> defined(__arch64__) which are defined through the compiler in
> sparc/Makefile if !SPARC32 which I assume to be if SPARC64.
> So select ARCH_HAS_PTE_SPECIAL if SPARC64
> 
> There is no functional change introduced by this patch.
> 
> Suggested-by: Jerome Glisse 
> Reviewed-by: Jerome Glisse 
> Acked-by: David Rientjes 
> Signed-off-by: Laurent Dufour 

Looks good to me. I have checked x86 and the generic code and it looks
good to me. Anyway arch maintainers really have to double check this.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Michal Hocko
On Wed 11-04-18 10:03:36, Laurent Dufour wrote:
> @@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
> unsigned long addr,
>  
>   if (is_zero_pfn(pfn))
>   return NULL;
> -check_pfn:
> +
> +check_pfn: __maybe_unused
>   if (unlikely(pfn > highest_memmap_pfn)) {
>   print_bad_pte(vma, addr, pte, NULL);
>   return NULL;
> @@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
> unsigned long addr,
>* NOTE! We still have PageReserved() pages in the page tables.
>* eg. VDSO mappings can cause them to exist.
>*/
> -out:
> +out: __maybe_unused
>   return pfn_to_page(pfn);

Why do we need this ugliness all of the sudden?

-- 
Michal Hocko
SUSE Labs


[PATCH v3 1/2] mm: introduce ARCH_HAS_PTE_SPECIAL

2018-04-11 Thread Laurent Dufour
Currently the PTE special supports is turned on in per architecture header
files. Most of the time, it is defined in arch/*/include/asm/pgtable.h
depending or not on some other per architecture static definition.

This patch introduce a new configuration variable to manage this directly
in the Kconfig files. It would later replace __HAVE_ARCH_PTE_SPECIAL.

Here notes for some architecture where the definition of
__HAVE_ARCH_PTE_SPECIAL is not obvious:

arm
 __HAVE_ARCH_PTE_SPECIAL which is currently defined in
arch/arm/include/asm/pgtable-3level.h which is included by
arch/arm/include/asm/pgtable.h when CONFIG_ARM_LPAE is set.
So select ARCH_HAS_PTE_SPECIAL if ARM_LPAE.

powerpc
__HAVE_ARCH_PTE_SPECIAL is defined in 2 files:
 - arch/powerpc/include/asm/book3s/64/pgtable.h
 - arch/powerpc/include/asm/pte-common.h
The first one is included if (PPC_BOOK3S & PPC64) while the second is
included in all the other cases.
So select ARCH_HAS_PTE_SPECIAL all the time.

sparc:
__HAVE_ARCH_PTE_SPECIAL is defined if defined(__sparc__) &&
defined(__arch64__) which are defined through the compiler in
sparc/Makefile if !SPARC32 which I assume to be if SPARC64.
So select ARCH_HAS_PTE_SPECIAL if SPARC64

There is no functional change introduced by this patch.

Suggested-by: Jerome Glisse 
Reviewed-by: Jerome Glisse 
Acked-by: David Rientjes 
Signed-off-by: Laurent Dufour 
---
 Documentation/features/vm/pte_special/arch-support.txt | 2 +-
 arch/arc/Kconfig   | 1 +
 arch/arc/include/asm/pgtable.h | 2 --
 arch/arm/Kconfig   | 1 +
 arch/arm/include/asm/pgtable-3level.h  | 1 -
 arch/arm64/Kconfig | 1 +
 arch/arm64/include/asm/pgtable.h   | 2 --
 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h   | 3 ---
 arch/powerpc/include/asm/pte-common.h  | 3 ---
 arch/riscv/Kconfig | 1 +
 arch/riscv/include/asm/pgtable-bits.h  | 3 ---
 arch/s390/Kconfig  | 1 +
 arch/s390/include/asm/pgtable.h| 1 -
 arch/sh/Kconfig| 1 +
 arch/sh/include/asm/pgtable.h  | 2 --
 arch/sparc/Kconfig | 1 +
 arch/sparc/include/asm/pgtable_64.h| 3 ---
 arch/x86/Kconfig   | 1 +
 arch/x86/include/asm/pgtable_types.h   | 1 -
 include/linux/pfn_t.h  | 4 ++--
 mm/Kconfig | 3 +++
 mm/gup.c   | 4 ++--
 mm/memory.c| 2 +-
 24 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/Documentation/features/vm/pte_special/arch-support.txt 
b/Documentation/features/vm/pte_special/arch-support.txt
index 055004f467d2..cd05924ea875 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -1,6 +1,6 @@
 #
 # Feature name:  pte_special
-# Kconfig:   __HAVE_ARCH_PTE_SPECIAL
+# Kconfig:   ARCH_HAS_PTE_SPECIAL
 # description:   arch supports the pte_special()/pte_mkspecial() VM 
APIs
 #
 ---
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index d76bf4a83740..8516e2b0239a 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -44,6 +44,7 @@ config ARC
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA
+   select ARCH_HAS_PTE_SPECIAL
 
 config MIGHT_HAVE_PCI
bool
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 08fe33830d4b..8ec5599a0957 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -320,8 +320,6 @@ PTE_BIT_FUNC(mkexec,|= (_PAGE_EXECUTE));
 PTE_BIT_FUNC(mkspecial,|= (_PAGE_SPECIAL));
 PTE_BIT_FUNC(mkhuge,   |= (_PAGE_HW_SZ));
 
-#define __HAVE_ARCH_PTE_SPECIAL
-
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a7f8e7f4b88f..c088c851b235 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,6 +8,7 @@ config ARM
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
+   select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
diff --git a/arch/arm/include/asm/pgtable-3level.h 

[PATCH v3 2/2] mm: remove odd HAVE_PTE_SPECIAL

2018-04-11 Thread Laurent Dufour
Remove the additional define HAVE_PTE_SPECIAL and rely directly on
CONFIG_ARCH_HAS_PTE_SPECIAL.

There is no functional change introduced by this patch

Signed-off-by: Laurent Dufour 
---
 mm/memory.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 96910c625daa..7f7dc7b2a341 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -817,17 +817,12 @@ static void print_bad_pte(struct vm_area_struct *vma, 
unsigned long addr,
  * PFNMAP mappings in order to support COWable mappings.
  *
  */
-#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-# define HAVE_PTE_SPECIAL 1
-#else
-# define HAVE_PTE_SPECIAL 0
-#endif
 struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 pte_t pte, bool with_public_device)
 {
unsigned long pfn = pte_pfn(pte);
 
-   if (HAVE_PTE_SPECIAL) {
+   if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
if (likely(!pte_special(pte)))
goto check_pfn;
if (vma->vm_ops && vma->vm_ops->find_special_page)
@@ -862,7 +857,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
return NULL;
}
 
-   /* !HAVE_PTE_SPECIAL case follows: */
+   /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
 
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
@@ -881,7 +876,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
 
if (is_zero_pfn(pfn))
return NULL;
-check_pfn:
+
+check_pfn: __maybe_unused
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
@@ -891,7 +887,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, 
unsigned long addr,
 * NOTE! We still have PageReserved() pages in the page tables.
 * eg. VDSO mappings can cause them to exist.
 */
-out:
+out: __maybe_unused
return pfn_to_page(pfn);
 }
 
@@ -904,7 +900,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, 
unsigned long addr,
/*
 * There is no pmd_special() but there may be special pmds, e.g.
 * in a direct-access (dax) mapping, so let's just replicate the
-* !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+* !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
 */
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
@@ -1933,7 +1929,8 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, 
unsigned long addr,
 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
 * without pte special, it would there be refcounted as a normal page.
 */
-   if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
+   if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
+   !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
struct page *page;
 
/*
-- 
2.7.4



[PATCH v3 0/2] move __HAVE_ARCH_PTE_SPECIAL in Kconfig

2018-04-11 Thread Laurent Dufour
The per architecture __HAVE_ARCH_PTE_SPECIAL is defined statically in the
per architecture header files. This doesn't allow to make other
configuration dependent on it.

The first patch of this series is replacing __HAVE_ARCH_PTE_SPECIAL by
CONFIG_ARCH_HAS_PTE_SPECIAL defined into the Kconfig files,
setting it automatically when architectures was already setting it in
header file.

The second patch is removing the odd define HAVE_PTE_SPECIAL which is a
duplicate of CONFIG_ARCH_HAS_PTE_SPECIAL.

There is no functional change introduced by this series.

--
Changes since v2:
 * remove __HAVE_ARCH_PTE_SPECIAL in arch/riscv/include/asm/pgtable-bits.h
 * use IS_ENABLED() instead of #ifdef blocks in patch 2

Laurent Dufour (2):
  mm: introduce ARCH_HAS_PTE_SPECIAL
  mm: remove odd HAVE_PTE_SPECIAL

 .../features/vm/pte_special/arch-support.txt  |  2 +-
 arch/arc/Kconfig  |  1 +
 arch/arc/include/asm/pgtable.h|  2 --
 arch/arm/Kconfig  |  1 +
 arch/arm/include/asm/pgtable-3level.h |  1 -
 arch/arm64/Kconfig|  1 +
 arch/arm64/include/asm/pgtable.h  |  2 --
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  3 ---
 arch/powerpc/include/asm/pte-common.h |  3 ---
 arch/riscv/Kconfig|  1 +
 arch/riscv/include/asm/pgtable-bits.h |  3 ---
 arch/s390/Kconfig |  1 +
 arch/s390/include/asm/pgtable.h   |  1 -
 arch/sh/Kconfig   |  1 +
 arch/sh/include/asm/pgtable.h |  2 --
 arch/sparc/Kconfig|  1 +
 arch/sparc/include/asm/pgtable_64.h   |  3 ---
 arch/x86/Kconfig  |  1 +
 arch/x86/include/asm/pgtable_types.h  |  1 -
 include/linux/pfn_t.h |  4 ++--
 mm/Kconfig|  3 +++
 mm/gup.c  |  4 ++--
 mm/memory.c   | 19 ---
 24 files changed, 25 insertions(+), 37 deletions(-)

-- 
2.7.4



[PATCH] powerpc/config: powernv_defconfig updates

2018-04-11 Thread Nicholas Piggin
For consideration
* Add PPC_MEMTRACE - Small powernv debugfs driver for getting hardware traces.
* Remove IDE - Deprecated and replaced with ATA.
* Add MEMORY_FAILURE - Machine check exceptions can now drive memory failure.
* Turn on FANOTIFY - This is the current filesystem notification feature.
* Turn on SCOM_DEBUGFS - Handy for hardware/firmware debugging, security risk?
* Remove WIRELESS - We had no drivers included for it.
* Remove RAW - Long deprecated in favour of direct IO.
* Remove floppy, parport, and PS2 input devices - not supported.
* Turn on async SCSI scanning - Let's see what breaks.
* Remove virtio drivers - We're host only.
* Remove PPP - Sorry Paulus.
* Make more things modules, NFS FS, RAM disk, netconsole, MS-DOS fs.

This does not disable FTRACE options, that's just an optimisation the
defconfig target made. make powernv_defconfig adds those to .config.

This results in a significantly smaller vmlinux:

   textdata bss dec hex filename
131217795284224 1383776 1978977912df7d3 vmlinux
120675644830754 1336008 182343261163bd6 vmlinux.patched

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/configs/powernv_defconfig | 52 +++---
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/configs/powernv_defconfig 
b/arch/powerpc/configs/powernv_defconfig
index 9e92aa6a52ba..11af6f5f236b 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -38,7 +38,9 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
+CONFIG_SCOM_DEBUGFS=y
 CONFIG_OPAL_PRD=y
+CONFIG_PPC_MEMTRACE=y
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
@@ -54,6 +56,8 @@ CONFIG_NUMA=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
@@ -81,32 +85,27 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_CLS_BPF=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_BPF=m
+CONFIG_DNS_RESOLVER=y
 CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_POWERNV_FLASH=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM=m
 CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
 CONFIG_BLK_DEV_NVME=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
 CONFIG_CHR_DEV_SG=y
 CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_SRP_ATTRS=y
 CONFIG_SCSI_CXGB3_ISCSI=m
@@ -121,7 +120,6 @@ CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
 CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_ALUA=m
@@ -152,10 +150,9 @@ CONFIG_DUMMY=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_VXLAN=m
-CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE=m
 CONFIG_TUN=m
 CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
 CONFIG_VORTEX=m
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -175,25 +172,21 @@ CONFIG_MLX4_EN=m
 CONFIG_MYRI10GE=m
 CONFIG_QLGE=m
 CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
+# CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-# CONFIG_SERIO_SERPORT is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_JSM=m
-CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IPMI_HANDLER=y
 CONFIG_IPMI_DEVICE_INTERFACE=y
 CONFIG_IPMI_POWERNV=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
+# CONFIG_DEVPORT is not set
 CONFIG_I2C_CHARDEV=y
+# CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_AST=y
 CONFIG_FIRMWARE_EDID=y
@@ -236,8 +229,7 @@ CONFIG_INFINIBAND_SRP=m
 CONFIG_INFINIBAND_ISER=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
+# CONFIG_VIRTIO_MENU is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -253,12 +245,13 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
+CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
@@ -270,9 +263,9 @@ CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
 CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
+CONFIG_NFS_FS=m
 CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
 CONFIG_NFSD=m
 

[PATCH v2 6/6] fsl_pmc: update device bindings

2018-04-11 Thread Ran Wang
From: Li Yang 

Signed-off-by: Li Yang 
Signed-off-by: Zhao Chenhui 
Signed-off-by: Ran Wang 
---
Changes in v2:
  - new file

 .../devicetree/bindings/powerpc/fsl/pmc.txt|   59 +++
 1 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt 
b/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt
index 07256b7..f1f749f 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt
@@ -9,15 +9,20 @@ Properties:
 
   "fsl,mpc8548-pmc" should be listed for any chip whose PMC is
   compatible.  "fsl,mpc8536-pmc" should also be listed for any chip
-  whose PMC is compatible, and implies deep-sleep capability.
+  whose PMC is compatible, and implies deep-sleep capability and
+  wake on user defined packet(wakeup on ARP).
+
+  "fsl,p1022-pmc" should be listed for any chip whose PMC is
+  compatible, and implies lossless Ethernet capability during sleep.
 
   "fsl,mpc8641d-pmc" should be listed for any chip whose PMC is
   compatible; all statements below that apply to "fsl,mpc8548-pmc" also
   apply to "fsl,mpc8641d-pmc".
 
   Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these
-  bit assignments are indicated via the sleep specifier in each device's
-  sleep property.
+  bit assignments are indicated via the clock nodes.  Device which has a
+  controllable clock source should have a "fsl,pmc-handle" property pointing
+  to the clock node.
 
 - reg: For devices compatible with "fsl,mpc8349-pmc", the first resource
   is the PMC block, and the second resource is the Clock Configuration
@@ -33,31 +38,35 @@ Properties:
   this is a phandle to an "fsl,gtm" node on which timer 4 can be used as
   a wakeup source from deep sleep.
 
-Sleep specifiers:
+Clock nodes:
+The clock nodes are to describe the masks in PM controller registers for each
+soc clock.
+- fsl,pmcdr-mask: For "fsl,mpc8548-pmc"-compatible devices, the mask will be
+  ORed into PMCDR before suspend if the device using this clock is the wake-up
+  source and need to be running during low power mode; clear the mask if
+  otherwise.
 
-  fsl,mpc8349-pmc: Sleep specifiers consist of one cell.  For each bit
-  that is set in the cell, the corresponding bit in SCCR will be saved
-  and cleared on suspend, and restored on resume.  This sleep controller
-  supports disabling and resuming devices at any time.
+- fsl,sccr-mask: For "fsl,mpc8349-pmc"-compatible devices, the corresponding
+  bit specified by the mask in SCCR will be saved and cleared on suspend, and
+  restored on resume.
 
-  fsl,mpc8536-pmc: Sleep specifiers consist of three cells, the third of
-  which will be ORed into PMCDR upon suspend, and cleared from PMCDR
-  upon resume.  The first two cells are as described for fsl,mpc8578-pmc.
-  This sleep controller only supports disabling devices during system
-  sleep, or permanently.
-
-  fsl,mpc8548-pmc: Sleep specifiers consist of one or two cells, the
-  first of which will be ORed into DEVDISR (and the second into
-  DEVDISR2, if present -- this cell should be zero or absent if the
-  hardware does not have DEVDISR2) upon a request for permanent device
-  disabling.  This sleep controller does not support configuring devices
-  to disable during system sleep (unless supported by another compatible
-  match), or dynamically.
+- fsl,devdisr-mask: Contain one or two cells, depending on the availability of
+  DEVDISR2 register.  For compatible devices, the mask will be ORed into 
DEVDISR
+  or DEVDISR2 when the clock should be permenently disabled.
 
 Example:
 
-   power@b00 {
-   compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc";
-   reg = <0xb00 0x100 0xa00 0x100>;
-   interrupts = <80 8>;
+   power@e0070 {
+   compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
+   reg = <0xe0070 0x20>;
+
+   etsec1_clk: soc-clk@24 {
+   fsl,pmcdr-mask = <0x0080>;
+   };
+   etsec2_clk: soc-clk@25 {
+   fsl,pmcdr-mask = <0x0040>;
+   };
+   etsec3_clk: soc-clk@26 {
+   fsl,pmcdr-mask = <0x0020>;
+   };
};
-- 
1.7.1



[PATCH v2 4/6] powerpc/pm: add sleep and deep sleep on QorIQ SoCs

2018-04-11 Thread Ran Wang
In sleep mode, the clocks of CPU core and unused IP blocks are turned
off (IP blocks allowed to wake up system will running).

Some QorIQ SoCs like MPC8536, P1022 and T104x, have deep sleep PM mode
in addtion to the sleep PM mode. While in deep sleep mode,
additionally, the power supply is removed from CPU core and most IP
blocks. Only the blocks needed to wake up the chip out of deep sleep
are ON.

This feature supports 32-bit and 36-bit address space.

The sleep mode is equal to the Standby state in Linux. The deep sleep
mode is equal to the Suspend-to-RAM state of Linux Power Management.
Command to enter sleep mode.
echo standby > /sys/power/state
Command to enter deep sleep mode.
echo mem > /sys/power/state

Signed-off-by: Dave Liu 
Signed-off-by: Li Yang 
Signed-off-by: Jin Qing 
Signed-off-by: Jerry Huang 
Signed-off-by: Ramneek Mehresh 
Signed-off-by: Zhao Chenhui 
Signed-off-by: Wang Dongsheng 
Signed-off-by: Tang Yuantian 
Signed-off-by: Xie Xiaobo 
Signed-off-by: Zhao Qiang 
Signed-off-by: Shengzhou Liu 
Signed-off-by: Ran Wang 
---
Changes in v2:
  - Resolve warnning of scripts/checkpatch.pl

 arch/powerpc/include/asm/cacheflush.h |7 +
 arch/powerpc/include/asm/fsl_pm.h |   31 +
 arch/powerpc/kernel/Makefile  |1 +
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |   10 +
 arch/powerpc/kernel/fsl_pm.c  |   49 +
 arch/powerpc/kernel/head_64.S |2 +-
 arch/powerpc/platforms/85xx/Kconfig   |6 +
 arch/powerpc/platforms/85xx/Makefile  |2 +
 arch/powerpc/platforms/85xx/deepsleep.c   |  349 
 arch/powerpc/platforms/85xx/qoriq_pm.c|  222 +
 arch/powerpc/platforms/85xx/sleep.S   | 1192 +
 arch/powerpc/platforms/86xx/Kconfig   |1 +
 arch/powerpc/sysdev/fsl_pmc.c |  176 -
 arch/powerpc/sysdev/fsl_soc.c |   31 +
 arch/powerpc/sysdev/fsl_soc.h |   18 +
 15 files changed, 2077 insertions(+), 20 deletions(-)
 create mode 100644 arch/powerpc/kernel/fsl_pm.c
 create mode 100644 arch/powerpc/platforms/85xx/deepsleep.c
 create mode 100644 arch/powerpc/platforms/85xx/qoriq_pm.c
 create mode 100644 arch/powerpc/platforms/85xx/sleep.S

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index b77f036..a5411af 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -31,6 +31,13 @@
 #define flush_dcache_mmap_lock(mapping)do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)  do { } while (0)
 
+extern void __flush_disable_L1(void);
+#ifdef CONFIG_FSL_SOC_BOOKE
+extern void flush_dcache_L1(void);
+#else
+#define flush_dcache_L1()  do { } while (0)
+#endif
+
 extern void flush_icache_range(unsigned long, unsigned long);
 extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
diff --git a/arch/powerpc/include/asm/fsl_pm.h 
b/arch/powerpc/include/asm/fsl_pm.h
index 47df55e..510e5d2 100644
--- a/arch/powerpc/include/asm/fsl_pm.h
+++ b/arch/powerpc/include/asm/fsl_pm.h
@@ -11,6 +11,9 @@
 #ifndef __PPC_FSL_PM_H
 #define __PPC_FSL_PM_H
 
+#ifndef __ASSEMBLY__
+#include 
+
 #define E500_PM_PH10   1
 #define E500_PM_PH15   2
 #define E500_PM_PH20   3
@@ -46,6 +49,34 @@ struct fsl_pm_ops {
 
 extern const struct fsl_pm_ops *qoriq_pm_ops;
 
+struct fsm_reg_vals {
+   u32 offset;
+   u32 value;
+};
+
+void fsl_fsm_setup(void __iomem *base, struct fsm_reg_vals *val);
+void fsl_epu_setup_default(void __iomem *epu_base);
+void fsl_npc_setup_default(void __iomem *npc_base);
+void fsl_fsm_clean(void __iomem *base, struct fsm_reg_vals *val);
+void fsl_epu_clean_default(void __iomem *epu_base);
+
+extern int fsl_dp_iomap(void);
+extern void fsl_dp_iounmap(void);
+
+extern int fsl_enter_epu_deepsleep(void);
+extern void fsl_dp_enter_low(void __iomem *ccsr_base, void __iomem *dcsr_base,
+void __iomem *pld_base, int pld_flag);
+extern void fsl_booke_deep_sleep_resume(void);
+
 int __init fsl_rcpm_init(void);
 
+void set_pm_suspend_state(suspend_state_t state);
+suspend_state_t pm_suspend_state(void);
+
+void fsl_set_power_except(struct device *dev, int on);
+#endif /* __ASSEMBLY__ */
+
+#define T1040QDS_TETRA_FLAG1
+#define T104xRDB_CPLD_FLAG 2
+
 #endif /* __PPC_FSL_PM_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2358f97..e736ea0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ 

[PATCH v2 5/6] powerpc:dts:pm: add power management node

2018-04-11 Thread Ran Wang
Enable Power Management feature on device tree, including MPC8536,
MPC8544, MPC8548, MPC8572, P1010, P1020, P1021, P1022, P2020, P2041,
P3041, T104X, T1024.

Signed-off-by: Zhao Chenhui 
Signed-off-by: Ran Wang 
---
Changes in v2:
  - no change

 arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi |   14 ++-
 arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi |2 +
 arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi |2 +
 arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi |2 +
 arch/powerpc/boot/dts/fsl/p1010si-post.dtsi   |8 
 arch/powerpc/boot/dts/fsl/p1020si-post.dtsi   |5 +++
 arch/powerpc/boot/dts/fsl/p1021si-post.dtsi   |5 +++
 arch/powerpc/boot/dts/fsl/p1022si-post.dtsi   |9 +++--
 arch/powerpc/boot/dts/fsl/p2020si-post.dtsi   |   14 +++
 arch/powerpc/boot/dts/fsl/pq3-power.dtsi  |   48 +
 arch/powerpc/boot/dts/fsl/t1024rdb.dts|2 +-
 arch/powerpc/boot/dts/fsl/t1040rdb.dts|2 +-
 arch/powerpc/boot/dts/fsl/t1042rdb.dts|2 +-
 arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts |2 +-
 14 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/fsl/pq3-power.dtsi

diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
index 4193570..fba40a1 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
@@ -199,6 +199,10 @@
 
 /include/ "pq3-dma-0.dtsi"
 /include/ "pq3-etsec1-0.dtsi"
+   enet0: ethernet@24000 {
+   fsl,wake-on-filer;
+   fsl,pmc-handle = <_clk>;
+   };
 /include/ "pq3-etsec1-timer-0.dtsi"
 
usb@22000 {
@@ -222,9 +226,10 @@
};
 
 /include/ "pq3-etsec1-2.dtsi"
-
-   ethernet@26000 {
+   enet2: ethernet@26000 {
cell-index = <1>;
+   fsl,wake-on-filer;
+   fsl,pmc-handle = <_clk>;
};
 
usb@2b000 {
@@ -249,4 +254,9 @@
reg = <0xe 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
+   power@e0070 {
+   compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
+   };
 };
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
index b68eb11..ea7416a 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
@@ -188,4 +188,6 @@
reg = <0xe 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
index 579d76c..dddb737 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
@@ -156,4 +156,6 @@
reg = <0xe 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
index 49294cf..40a6cff 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
@@ -193,4 +193,6 @@
reg = <0xe 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index 1b4aafc..47b62a8 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -173,6 +173,8 @@
 
 /include/ "pq3-etsec2-0.dtsi"
enet0: ethernet@b {
+   fsl,pmc-handle = <_clk>;
+
queue-group@b {
fsl,rx-bit-map = <0xff>;
fsl,tx-bit-map = <0xff>;
@@ -181,6 +183,8 @@
 
 /include/ "pq3-etsec2-1.dtsi"
enet1: ethernet@b1000 {
+   fsl,pmc-handle = <_clk>;
+
queue-group@b1000 {
fsl,rx-bit-map = <0xff>;
fsl,tx-bit-map = <0xff>;
@@ -189,6 +193,8 @@
 
 /include/ "pq3-etsec2-2.dtsi"
enet2: ethernet@b2000 {
+   fsl,pmc-handle = <_clk>;
+
queue-group@b2000 {
fsl,rx-bit-map = <0xff>;
fsl,tx-bit-map = <0xff>;
@@ -201,4 +207,6 @@
reg = <0xe 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
index 642dc3a..cc4c746 100644
--- a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
@@ -163,14 +163,17 @@
 
 /include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b {
+   fsl,pmc-handle = <_clk>;
};
 
 /include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 

[PATCH v2 3/6] powerpc/cache: add cache flush operation for various e500

2018-04-11 Thread Ran Wang
Various e500 core have different cache architecture, so they
need different cache flush operations. Therefore, add a callback
function cpu_flush_caches to the struct cpu_spec. The cache flush
operation for the specific kind of e500 is selected at init time.
The callback function will flush all caches in the current cpu.

Signed-off-by: Chenhui Zhao 
Reviewed-by: Yang Li 
Reviewed-by: Jose Rivera 
Signed-off-by: Ran Wang 
---
Changes in v2:
  - no change

 arch/powerpc/include/asm/cputable.h   |   12 
 arch/powerpc/kernel/asm-offsets.c |3 +
 arch/powerpc/kernel/cpu_setup_fsl_booke.S |   81 +
 arch/powerpc/kernel/cputable.c|4 ++
 4 files changed, 100 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 2e2bacb..d04c46d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -44,6 +44,14 @@ enum powerpc_pmc_type {
 extern int machine_check_e500(struct pt_regs *regs);
 extern int machine_check_e200(struct pt_regs *regs);
 extern int machine_check_47x(struct pt_regs *regs);
+
+#if defined(CONFIG_E500) || defined(CONFIG_PPC_E500MC)
+extern void __flush_caches_e500v2(void);
+extern void __flush_caches_e500mc(void);
+extern void __flush_caches_e5500(void);
+extern void __flush_caches_e6500(void);
+#endif
+
 int machine_check_8xx(struct pt_regs *regs);
 
 extern void cpu_down_flush_e500v2(void);
@@ -70,6 +78,10 @@ struct cpu_spec {
/* flush caches inside the current cpu */
void (*cpu_down_flush)(void);
 
+#if defined(CONFIG_E500) || defined(CONFIG_PPC_E500MC)
+   /* flush caches of the cpu which is running the function */
+   void (*cpu_flush_caches)(void);
+#endif
/* number of performance monitor counters */
unsigned intnum_pmcs;
enum powerpc_pmc_type pmc_type;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index ea5eb91..cb4b869 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -351,6 +351,9 @@ int main(void)
OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
OFFSET(CPU_SPEC_RESTORE, cpu_spec, cpu_restore);
+#if defined(CONFIG_E500) || defined(CONFIG_PPC_E500MC)
+   OFFSET(CPU_FLUSH_CACHES, cpu_spec, cpu_flush_caches);
+#endif
 
OFFSET(pbe_address, pbe, address);
OFFSET(pbe_orig_address, pbe, orig_address);
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S 
b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 462aed9..e94eb41 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -345,3 +345,84 @@ _GLOBAL(cpu_down_flush_e5500)
 /* L1 Data Cache of e6500 contains no modified data, no flush is required */
 _GLOBAL(cpu_down_flush_e6500)
blr
+
+_GLOBAL(__flush_caches_e500v2)
+   mflr r0
+   bl  flush_dcache_L1
+   mtlr r0
+   blr
+
+_GLOBAL(__flush_caches_e500mc)
+_GLOBAL(__flush_caches_e5500)
+   mflr r0
+   bl  flush_dcache_L1
+   bl  flush_backside_L2_cache
+   mtlr r0
+   blr
+
+/* L1 Data Cache of e6500 contains no modified data, no flush is required */
+_GLOBAL(__flush_caches_e6500)
+   blr
+
+   /* r3 = virtual address of L2 controller, WIMG = 01xx */
+_GLOBAL(flush_disable_L2)
+   /* It's a write-through cache, so only invalidation is needed. */
+   mbar
+   isync
+   lwz r4, 0(r3)
+   li  r5, 1
+   rlwimi  r4, r5, 30, 0xc000
+   stw r4, 0(r3)
+
+   /* Wait for the invalidate to finish */
+1: lwz r4, 0(r3)
+   andis.  r4, r4, 0x4000
+   bne 1b
+   mbar
+
+   blr
+
+   /* r3 = virtual address of L2 controller, WIMG = 01xx */
+_GLOBAL(invalidate_enable_L2)
+   mbar
+   isync
+   lwz r4, 0(r3)
+   li  r5, 3
+   rlwimi  r4, r5, 30, 0xc000
+   stw r4, 0(r3)
+
+   /* Wait for the invalidate to finish */
+1: lwz r4, 0(r3)
+   andis.  r4, r4, 0x4000
+   bne 1b
+   mbar
+
+   blr
+
+/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */
+_GLOBAL(__flush_disable_L1)
+   mflrr10
+   bl  flush_dcache_L1 /* Flush L1 d-cache */
+   mtlrr10
+
+   mfspr   r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */
+   li  r5, 2
+   rlwimi  r4, r5, 0, 3
+
+   msync
+   isync
+   mtspr   SPRN_L1CSR0, r4
+   isync
+
+1: mfspr   r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */
+   andi.   r4, r4, 2
+   bne 1b
+
+   mfspr   r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */
+   li  r5, 2
+   rlwimi  r4, r5, 0, 3
+
+   mtspr   SPRN_L1CSR1, r4
+   isync
+
+   blr
diff --git 

[PATCH v2 2/6] drivers/soc/fsl: add EPU FSM configuration for deep sleep

2018-04-11 Thread Ran Wang
In the last stage of deep sleep, software will trigger a Finite
State Machine (FSM) to control the hardware procedure, such a
board isolation, killing PLLs, removing power, and so on.

When the system is waked up by an interrupt, the FSM controls
the hardware to complete the early resume procedure.

This patch configure the EPU FSM preparing for deep sleep.

Signed-off-by: Hongbo Zhang 
Signed-off-by: Chenhui Zhao 
Signed-off-by: Ran Wang 
---
Changes in v2:
  - Resolve warnning of scripts/checkpatch.pl

 drivers/soc/fsl/Kconfig |7 +
 drivers/soc/fsl/Makefile|1 +
 drivers/soc/fsl/sleep_fsm.c |  279 +++
 drivers/soc/fsl/sleep_fsm.h |  130 
 4 files changed, 417 insertions(+), 0 deletions(-)
 create mode 100644 drivers/soc/fsl/sleep_fsm.c
 create mode 100644 drivers/soc/fsl/sleep_fsm.h

diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
index 7a9fb9b..4222bd5 100644
--- a/drivers/soc/fsl/Kconfig
+++ b/drivers/soc/fsl/Kconfig
@@ -16,3 +16,10 @@ config FSL_GUTS
  Initially only reading SVR and registering soc device are supported.
  Other guts accesses, such as reading RCW, should eventually be moved
  into this driver as well.
+
+config FSL_SLEEP_FSM
+   bool
+   help
+ This driver configures a hardware FSM (Finite State Machine) for deep 
sleep.
+ The FSM is used to finish clean-ups at the last stage of system 
entering deep
+ sleep, and also wakes up system when a wake up event happens.
diff --git a/drivers/soc/fsl/Makefile b/drivers/soc/fsl/Makefile
index 44b3beb..28c38c3 100644
--- a/drivers/soc/fsl/Makefile
+++ b/drivers/soc/fsl/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_FSL_DPAA) += qbman/
 obj-$(CONFIG_QUICC_ENGINE) += qe/
 obj-$(CONFIG_CPM)  += qe/
 obj-$(CONFIG_FSL_GUTS) += guts.o
+obj-$(CONFIG_FSL_SLEEP_FSM)+= sleep_fsm.o
diff --git a/drivers/soc/fsl/sleep_fsm.c b/drivers/soc/fsl/sleep_fsm.c
new file mode 100644
index 000..a303098
--- /dev/null
+++ b/drivers/soc/fsl/sleep_fsm.c
@@ -0,0 +1,279 @@
+/*
+ * deep sleep FSM (finite-state machine) configuration
+ *
+ * Copyright 2018 NXP
+ *
+ * Author: Hongbo Zhang 
+ * Chenhui Zhao 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution.
+ * * Neither the name of the above-listed copyright holders nor the
+ *  names of any contributors may be used to endorse or promote products
+ *  derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+
+#include "sleep_fsm.h"
+/*
+ * These values are from chip's reference manual. For example,
+ * the values for T1040 can be found in "8.4.3.8 Programming
+ * supporting deep sleep mode" of Chapter 8 "Run Control and
+ * Power Management (RCPM)".
+ * The default value can be applied to T104x, LS1021.
+ */
+struct fsm_reg_vals epu_default_val[] = {
+   /* EPGCR (Event Processor Global Control Register) */
+   {EPGCR, 0},
+   /* EPECR (Event Processor Event Control Registers) */
+   {EPECR0 + EPECR_STRIDE * 0, 0},
+   {EPECR0 + EPECR_STRIDE * 1, 0},
+   {EPECR0 + EPECR_STRIDE * 2, 0xF0004004},
+   {EPECR0 + EPECR_STRIDE * 3, 0x8084},
+   {EPECR0 + EPECR_STRIDE * 4, 0x2084},
+ 

[PATCH v2 1/6] powerpc/pm: Fix suspend=n in menuconfig for e500mc platforms.

2018-04-11 Thread Ran Wang
Also, unselect FSL_PMC which is for older platfroms instead.

Signed-off-by: Ran Wang 
---
Changes in v2:
  - no change

 arch/powerpc/Kconfig |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 73ce5dd..ed60c83 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -316,7 +316,7 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
def_bool y
depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
-  (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
+  FSL_SOC_BOOKE || PPC_86xx || PPC_PSERIES \
   || 44x || 40x
 
 config PPC_DCR_NATIVE
@@ -940,8 +940,6 @@ config FSL_PCI
 
 config FSL_PMC
bool
-   default y
-   depends on SUSPEND && (PPC_85xx || PPC_86xx)
help
  Freescale MPC85xx/MPC86xx power management controller support
  (suspend/resume). For MPC83xx see platforms/83xx/suspend.c
-- 
1.7.1



[PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters

2018-04-11 Thread Alistair Popple
There is a single npu context per set of callback parameters. Callers
should be prevented from overwriting existing callback values so instead
return an error if different parameters are passed.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/include/asm/powernv.h   |  2 +-
 arch/powerpc/platforms/powernv/npu-dma.c | 16 +---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/powernv.h 
b/arch/powerpc/include/asm/powernv.h
index dc5f6a5d4575..362ea12a4501 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -15,7 +15,7 @@
 extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
 extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
-   struct npu_context *(*cb)(struct npu_context *, void *),
+   void (*cb)(struct npu_context *, void *),
void *priv);
 extern void pnv_npu2_destroy_context(struct npu_context *context,
struct pci_dev *gpdev);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
b/arch/powerpc/platforms/powernv/npu-dma.c
index cb77162f4e7a..193f43ea3fbc 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -407,7 +407,7 @@ struct npu_context {
bool nmmu_flush;
 
/* Callback to stop translation requests on a given GPU */
-   struct npu_context *(*release_cb)(struct npu_context *, void *);
+   void (*release_cb)(struct npu_context *context, void *priv);
 
/*
 * Private pointer passed to the above callback for usage by
@@ -705,7 +705,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = 
{
  */
 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
-   struct npu_context *(*cb)(struct npu_context *, void *),
+   void (*cb)(struct npu_context *, void *),
void *priv)
 {
int rc;
@@ -763,8 +763,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev 
*gpdev,
 */
spin_lock(_context_lock);
npu_context = mm->context.npu_context;
-   if (npu_context)
+   if (npu_context) {
+   if (npu_context->release_cb != cb ||
+   npu_context->priv != priv) {
+   spin_unlock(_context_lock);
+   opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+   PCI_DEVID(gpdev->bus->number,
+   gpdev->devfn));
+   return ERR_PTR(-EINVAL);
+   }
+
WARN_ON(!kref_get_unless_zero(_context->kref));
+   }
spin_unlock(_context_lock);
 
if (!npu_context) {
-- 
2.11.0



[PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy

2018-04-11 Thread Alistair Popple
The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
used to allocate/free contexts to allow address translation and shootdown
by the NPU on a particular GPU. Context initialisation is implicitly safe
as it is protected by the requirement mmap_sem be held in write mode,
however pnv_npu2_destroy_context() does not require mmap_sem to be held and
it is not safe to call with a concurrent initialisation for a different
GPU.

It was assumed the driver would ensure destruction was not called
concurrently with initialisation. However the driver may be simplified by
allowing concurrent initialisation and destruction for different GPUs. As
npu context creation/destruction is not a performance critical path and the
critical section is not large a single spinlock is used for simplicity.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/npu-dma.c | 51 ++--
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
b/arch/powerpc/platforms/powernv/npu-dma.c
index 1cbef1f9cd37..cb77162f4e7a 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -34,6 +34,12 @@
 #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
 
 /*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+DEFINE_SPINLOCK(npu_context_lock);
+
+/*
  * Other types of TCE cache invalidation are not functional in the
  * hardware.
  */
@@ -694,7 +700,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = 
{
  * Returns an error if there no contexts are currently available or a
  * npu_context which should be passed to pnv_npu2_handle_fault().
  *
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
  */
 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
@@ -741,7 +748,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev 
*gpdev,
/*
 * Setup the NPU context table for a particular GPU. These need to be
 * per-GPU as we need the tables to filter ATSDs when there are no
-* active contexts on a particular GPU.
+* active contexts on a particular GPU. It is safe for these to be
+* called concurrently with destroy as the OPAL call takes appropriate
+* locks and refcounts on init/destroy.
 */
rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -752,8 +761,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev 
*gpdev,
 * We store the npu pci device so we can more easily get at the
 * associated npus.
 */
+   spin_lock(_context_lock);
npu_context = mm->context.npu_context;
+   if (npu_context)
+   WARN_ON(!kref_get_unless_zero(_context->kref));
+   spin_unlock(_context_lock);
+
if (!npu_context) {
+   /*
+* We can set up these fields without holding the
+* npu_context_lock as the npu_context hasn't been returned to
+* the caller meaning it can't be destroyed. Parallel allocation
+* is protected against by mmap_sem.
+*/
rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
if (npu_context) {
@@ -772,8 +792,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev 
*gpdev,
}
 
mm->context.npu_context = npu_context;
-   } else {
-   WARN_ON(!kref_get_unless_zero(_context->kref));
}
 
npu_context->release_cb = cb;
@@ -811,15 +829,16 @@ static void pnv_npu2_release_context(struct kref *kref)
mm_context_remove_copro(npu_context->mm);
 
npu_context->mm->context.npu_context = NULL;
-   mmu_notifier_unregister(_context->mn,
-   npu_context->mm);
-
-   kfree(npu_context);
 }
 
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
 {
+   int removed;
struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -841,7 +860,21 @@ void pnv_npu2_destroy_context(struct npu_context 
*npu_context,
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
-   kref_put(_context->kref, pnv_npu2_release_context);
+