[PATCH v2 1/4] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christophe Leroy
The only callers of flush_instruction_cache() are:

arch/powerpc/kernel/swsusp_booke.S: bl flush_instruction_cache
arch/powerpc/mm/nohash/40x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/44x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/fsl_booke.c: flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();

This function is not used by book3s/32, drop it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..5c074c2ff5b5 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -271,9 +271,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 /*
  * Flush instruction cache.
- * This is a no-op on the 601.
  */
-#ifndef CONFIG_PPC_8xx
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
 _GLOBAL(flush_instruction_cache)
 #if defined(CONFIG_4xx)
lis r3, KERNELBASE@h
@@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-   blr /* for 601, do nothing */
-#else
-   /* 603/604 processor - use invalidate-all bit in HID0 */
-   mfspr   r3,SPRN_HID0
-   ori r3,r3,HID0_ICFI
-   mtspr   SPRN_HID0,r3
 #endif /* CONFIG_4xx */
isync
blr
 EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
+#endif
 
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
-- 
2.25.0



[PATCH v2 3/4] powerpc: Rewrite 4xx flush_cache_instruction() in C

2020-08-13 Thread Christophe Leroy
Nothing prevents flush_cache_instruction() from being writen in C.

Do it to improve readability and maintainability.

This function is very small and isn't called from assembly,
make it static inline in asm/cacheflush.h

Signed-off-by: Christophe Leroy 
---
v2: Written as a static inline instead of adding a new C file for this function 
alone.
---
 arch/powerpc/include/asm/cacheflush.h | 8 
 arch/powerpc/kernel/misc_32.S | 7 +--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 481877879fec..138e46d8c04e 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -98,7 +98,15 @@ static inline void invalidate_dcache_range(unsigned long 
start,
mb();   /* sync */
 }
 
+#ifdef CONFIG_4xx
+static inline void flush_instruction_cache(void)
+{
+   iccci((void *)KERNELBASE);
+   isync();
+}
+#else
 void flush_instruction_cache(void);
+#endif
 
 #include 
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 5c074c2ff5b5..1bda207459a8 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -272,12 +272,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 /*
  * Flush instruction cache.
  */
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_FSL_BOOKE
 _GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
-   lis r3, KERNELBASE@h
-   iccci   0,r3
-#elif defined(CONFIG_FSL_BOOKE)
 #ifdef CONFIG_E200
mfspr   r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
@@ -289,7 +285,6 @@ _GLOBAL(flush_instruction_cache)
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#endif /* CONFIG_4xx */
isync
blr
 EXPORT_SYMBOL(flush_instruction_cache)
-- 
2.25.0



Re: [PATCH 1/5] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christophe Leroy




Le 13/08/2020 à 14:14, Christoph Hellwig a écrit :

On Thu, Aug 13, 2020 at 01:13:08PM +0100, Christoph Hellwig wrote:

On Thu, Aug 13, 2020 at 10:12:00AM +, Christophe Leroy wrote:

-#ifndef CONFIG_PPC_8xx
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
  _GLOBAL(flush_instruction_cache)
  #if defined(CONFIG_4xx)
lis r3, KERNELBASE@h
@@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-   blr /* for 601, do nothing */
-#else
-   /* 603/604 processor - use invalidate-all bit in HID0 */
-   mfspr   r3,SPRN_HID0
-   ori r3,r3,HID0_ICFI
-   mtspr   SPRN_HID0,r3
  #endif /* CONFIG_4xx */
isync
blr
  EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
+#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */


What about untangling this into entirely separate versions instead
of the ifdef mess?  Also the export does not seem to be needed at all.


Ok, I see that you do that later, sorry.



In v2, I drop the untangling patch, because the series completely 
dismantles flush_instruction_cache() so there is no need for an 
ephemeral untanggled version of it.


Christophe


[PATCH v2] powerpc: Remove flush_instruction_cache() on 8xx

2020-08-13 Thread Christophe Leroy
flush_instruction_cache() is never used on 8xx, remove it.

Signed-off-by: Christophe Leroy 
---
v2: Becomes a standalone patch independant of the series dismantling the ASM 
flush_instruction_cache()
---
 arch/powerpc/mm/nohash/8xx.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d2b37146ae6c..231ca95f9ffb 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -244,13 +244,6 @@ void set_context(unsigned long id, pgd_t *pgd)
mb();
 }
 
-void flush_instruction_cache(void)
-{
-   isync();
-   mtspr(SPRN_IC_CST, IDC_INVALL);
-   isync();
-}
-
 #ifdef CONFIG_PPC_KUEP
 void __init setup_kuep(bool disabled)
 {
-- 
2.25.0



Re: [PATCH v3] powerpc/pseries: explicitly reschedule during drmem_lmb list traversal

2020-08-13 Thread Christophe Leroy




Le 13/08/2020 à 17:11, Nathan Lynch a écrit :

The drmem lmb list can have hundreds of thousands of entries, and
unfortunately lookups take the form of linear searches. As long as
this is the case, traversals have the potential to monopolize the CPU
and provoke lockup reports, workqueue stalls, and the like unless
they explicitly yield.

Rather than placing cond_resched() calls within various
for_each_drmem_lmb() loop blocks in the code, put it in the iteration
expression of the loop macro itself so users can't omit it.

Introduce a drmem_lmb_next() iteration helper function which calls
cond_resched() at a regular interval during array traversal. Each
iteration of the loop in DLPAR code paths can involve around ten RTAS
calls which can each take up to 250us, so this ensures the check is
performed at worst every few milliseconds.

Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT 
format")
Signed-off-by: Nathan Lynch 


Looks a lot better to me than v2.

Reviewed-by: Christophe Leroy 


---

Notes:
 Changes since v2:
 * Make drmem_lmb_next() more general.
 * Adjust reschedule interval for better code generation.
 * Add commentary to drmem_lmb_next() to explain the cond_resched()
   call.
 * Remove bounds assertions.
 
 Changes since v1:

 * Add bounds assertions in drmem_lmb_next().
 * Call cond_resched() in the iterator on only every 20th element
   instead of on every iteration, to reduce overhead in tight loops.

  arch/powerpc/include/asm/drmem.h | 18 +-
  1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 17ccc6474ab6..6fb928605ed1 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -8,6 +8,8 @@
  #ifndef _ASM_POWERPC_LMB_H
  #define _ASM_POWERPC_LMB_H
  
+#include 

+
  struct drmem_lmb {
u64 base_addr;
u32 drc_index;
@@ -26,8 +28,22 @@ struct drmem_lmb_info {
  
  extern struct drmem_lmb_info *drmem_info;
  
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,

+  const struct drmem_lmb *start)
+{
+   /*
+* DLPAR code paths can take several milliseconds per element
+* when interacting with firmware. Ensure that we don't
+* unfairly monopolize the CPU.
+*/
+   if (((++lmb - start) % 16) == 0)
+   cond_resched();
+
+   return lmb;
+}
+
  #define for_each_drmem_lmb_in_range(lmb, start, end)  \
-   for ((lmb) = (start); (lmb) < (end); (lmb)++)
+   for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
  
  #define for_each_drmem_lmb(lmb)	\

for_each_drmem_lmb_in_range((lmb),  \



[PATCH] kernel/watchdog: fix warning -Wunused-variable for watchdog_allowed_mask in ppc64

2020-08-13 Thread Balamuruhan S
In ppc64 config if `CONFIG_SOFTLOCKUP_DETECTOR` is not set then it
warns for unused declaration of `watchdog_allowed_mask` while building,
move the declaration inside ifdef later in the code.

```
kernel/watchdog.c:47:23: warning: ‘watchdog_allowed_mask’ defined but not used 
[-Wunused-variable]
 static struct cpumask watchdog_allowed_mask __read_mostly;
```

Signed-off-by: Balamuruhan S 
---
 kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5abb5b22ad13..33c9b8a3d51b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -44,7 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
 static int __read_mostly nmi_watchdog_available;
 
-static struct cpumask watchdog_allowed_mask __read_mostly;
 
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(_cpumask);
@@ -166,6 +165,7 @@ int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
+static struct cpumask watchdog_allowed_mask __read_mostly;
 static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 

base-commit: a3a28c4451dff698d0c7ef5a3e80423aa5774e2b
-- 
2.24.1



[PATCH] powerpc: Add POWER10 raw mode cputable entry

2020-08-13 Thread Madhavan Srinivasan
Add a raw mode cputable entry for POWER10. Copies most of the fields
from commit a3ea40d5c736 ("powerpc: Add POWER10 architected mode")
except for oprofile_cpu_type, machine_check_early, pvr_mask and pvr_mask
filed. On bare metal systems we use DT CPU features, which doesn't need a
cputable entry. But in VMs we still rely on the raw cputable entry to
set the correct values for the PMU related fields.

Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/kernel/cputable.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b4066354f0730..1e052f53e5dca 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -541,6 +541,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early= __machine_check_early_realmode_p9,
.platform   = "power9",
},
+   {   /* Power10 */
+   .pvr_mask   = 0x,
+   .pvr_value  = 0x0080,
+   .cpu_name   = "POWER10 (raw)",
+   .cpu_features   = CPU_FTRS_POWER10,
+   .cpu_user_features  = COMMON_USER_POWER10,
+   .cpu_user_features2 = COMMON_USER2_POWER10,
+   .mmu_features   = MMU_FTRS_POWER10,
+   .icache_bsize   = 128,
+   .dcache_bsize   = 128,
+   .num_pmcs   = 6,
+   .pmc_type   = PPC_PMC_IBM,
+   .oprofile_cpu_type  = "ppc64/power10",
+   .oprofile_type  = PPC_OPROFILE_INVALID,
+   .cpu_setup  = __setup_cpu_power10,
+   .cpu_restore= __restore_cpu_power10,
+   .machine_check_early= __machine_check_early_realmode_p10,
+   .platform   = "power10",
+   },
{   /* Cell Broadband Engine */
.pvr_mask   = 0x,
.pvr_value  = 0x0070,
-- 
2.26.2



Re: BUG: unable to handle kernel paging request in fl_dump_key

2020-08-13 Thread syzbot
syzbot has bisected this issue to:

commit a51486266c3ba8e035a47fa96df67f274fe0c7d0
Author: Jiri Pirko 
Date:   Sat Jun 15 09:03:49 2019 +

net: sched: remove NET_CLS_IND config option

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1746350990
start commit:   1ca0fafd tcp: md5: allow changing MD5 keys in all socket s..
git tree:   net
final oops: https://syzkaller.appspot.com/x/report.txt?x=14c6350990
console output: https://syzkaller.appspot.com/x/log.txt?x=10c6350990
kernel config:  https://syzkaller.appspot.com/x/.config?x=bf3aec367b9ab569
dashboard link: https://syzkaller.appspot.com/bug?extid=9c1be56e9317b795e874
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1062a40b10

Reported-by: syzbot+9c1be56e9317b795e...@syzkaller.appspotmail.com
Fixes: a51486266c3b ("net: sched: remove NET_CLS_IND config option")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection


fsl_espi errors on v5.7.15

2020-08-13 Thread Chris Packham
Hi,

I'm seeing a problem with accessing spi-nor after upgrading a T2081 
based system to linux v5.7.15

For this board u-boot and the u-boot environment live on spi-nor.

When I use fw_setenv from userspace I get the following kernel logs

# fw_setenv foo=1
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but SPIE_DON isn't set!
fsl_espi ffe11.spi: Transfer done but rx/tx fifo's aren't empty!
fsl_espi ffe11.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
fsl_espi ffe11.spi: Transfer done but rx/tx fifo's aren't empty!
fsl_espi ffe11.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
fsl_espi ffe11.spi: Transfer done but rx/tx fifo's aren't empty!
fsl_espi ffe11.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
...

If I run fw_printenv (before getting it into a bad state) it is able to 
display the content of the boards u-boot environment.

If been unsuccessful in producing a setup for bisecting the issue. I do 
know the issue doesn't occur on the old 4.4.x based kernel but that's 
probably not much help.

Any pointers on what the issue (and/or solution) might be.

Thanks,
Chris


Re: [PATCH] powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute

2020-08-13 Thread Michael Ellerman
"Aneesh Kumar K.V"  writes:
> On 8/13/20 10:04 AM, Vaibhav Jain wrote:
>> The newly introduced 'perf_stats' attribute uses the default access
>> mode of 0444 letting non-root users access performance stats of an
>> nvdimm and potentially force the kernel into issuing large number of
>> expensive HCALLs. Since the information exposed by this attribute
>> cannot be cached hence its better to ward of access to this attribute
>> from users who don't need to access these performance statistics.
>> 
>> Hence this patch adds check in perf_stats_show() to only let users
>> that are 'perfmon_capable()' to read the nvdimm performance
>> statistics.
>> 
>> Fixes: 2d02bf835e573 ('powerpc/papr_scm: Fetch nvdimm performance stats from 
>> PHYP')
>> Reported-by: Aneesh Kumar K.V 
>> Signed-off-by: Vaibhav Jain 
>> ---
>>   arch/powerpc/platforms/pseries/papr_scm.c | 4 
>>   1 file changed, 4 insertions(+)
>> 
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
>> b/arch/powerpc/platforms/pseries/papr_scm.c
>> index f439f0dfea7d1..36c51bf8af9a8 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -792,6 +792,10 @@ static ssize_t perf_stats_show(struct device *dev,
>>  struct nvdimm *dimm = to_nvdimm(dev);
>>  struct papr_scm_priv *p = nvdimm_provider_data(dimm);
>>   
>> +/* Allow access only to perfmon capable users */
>> +if (!perfmon_capable())
>> +return -EACCES;
>> +
>
> An access check is usually done in open(). This is the read callback IIUC.

Yes. Otherwise an unprivileged user can open the file, and then trick a
suid program into reading from it.

cheers


Re: [RFC PATCH 1/2] powerpc/numa: Introduce logical numa id

2020-08-13 Thread Nathan Lynch
Hi Aneesh,

"Aneesh Kumar K.V"  writes:
> "Aneesh Kumar K.V"  writes:
>> On 8/8/20 2:15 AM, Nathan Lynch wrote:
>>> "Aneesh Kumar K.V"  writes:
 On 8/7/20 9:54 AM, Nathan Lynch wrote:
> "Aneesh Kumar K.V"  writes:
>> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
>> index e437a9ac4956..6c659aada55b 100644
>> --- a/arch/powerpc/mm/numa.c
>> +++ b/arch/powerpc/mm/numa.c
>> @@ -221,25 +221,51 @@ static void initialize_distance_lookup_table(int 
>> nid,
>>  }
>>}
>>
>> +static u32 nid_map[MAX_NUMNODES] = {[0 ... MAX_NUMNODES - 1] =  
>> NUMA_NO_NODE};
>
> It's odd to me to use MAX_NUMNODES for this array when it's going to be
> indexed not by Linux's logical node IDs but by the platform-provided
> domain number, which has no relation to MAX_NUMNODES.


 I didn't want to dynamically allocate this. We could fetch
 "ibm,max-associativity-domains" to find the size for that. The current
 code do assume  firmware group id to not exceed MAX_NUMNODES. Hence kept
 the array size to be MAX_NUMNODEs. I do agree that it is confusing. May
 be we can do #define MAX_AFFINITY_DOMAIN MAX_NUMNODES?
>>> 
>>> Well, consider:
>>> 
>>> - ibm,max-associativity-domains can change at runtime with LPM. This
>>>doesn't happen in practice yet, but we should probably start thinking
>>>about how to support that.
>>> - The domain numbering isn't clearly specified to have any particular
>>>properties such as beginning at zero or a contiguous range.
>>> 
>>> While the current code likely contains assumptions contrary to these
>>> points, a change such as this is an opportunity to think about whether
>>> those assumptions can be reduced or removed. In particular I think it
>>> would be good to gracefully degrade when the number of NUMA affinity
>>> domains can exceed MAX_NUMNODES. Using the platform-supplied domain
>>> numbers to directly index Linux data structures will make that
>>> impossible.
>>> 
>>> So, maybe genradix or even xarray wouldn't actually be overengineering
>>> here.
>>> 
>>
>> One of the challenges with such a data structure is that we initialize 
>> the nid_map before the slab is available. This means a memblock based 
>> allocation and we would end up implementing such a sparse data structure 
>> ourselves here.

Yes, good point.


>> As you mentioned above, since we know that hypervisor as of now limits 
>> the max affinity domain id below ibm,max-associativity-domains we are 
>> good with an array-like nid_map we have here. This keeps the code simpler.
>>
>> This will also allow us to switch to a more sparse data structure as you 
>> requested here in the future because the main change that is pushed in 
>> this series is the usage of firmare_group_id_to_nid(). The details of 
>> the data structure we use to keep track of that mapping are pretty much 
>> internal to that function.
>
> How about this? This makes it not a direct index. But it do limit the
> search to max numa node on the system. 
>
> static int domain_id_map[MAX_NUMNODES] = {[0 ... MAX_NUMNODES - 1] =  -1 };
>
> static int __affinity_domain_to_nid(int domain_id, int max_nid)
> {
>   int i;
>
>   for (i = 0; i < max_nid; i++) {
>   if (domain_id_map[i] == domain_id)
>   return i;
>   }
>   return NUMA_NO_NODE;
> }

OK, this indexes the array by Linux's node id, good. I was wondering if
I could persuade you do flip it around like this :-)

Walking through the code below:

> int affinity_domain_to_nid(struct affinity_domain *domain)
> {
>   int nid, domain_id;
>   static int last_nid = 0;
>   static DEFINE_SPINLOCK(node_id_lock);
>
>   domain_id = domain->id;
>   /*
>* For PowerNV we don't change the node id. This helps to avoid
>* confusion w.r.t the expected node ids. On pseries, node numbers
>* are virtualized. Hence do logical node id for pseries.
>*/
>   if (!firmware_has_feature(FW_FEATURE_LPAR))
>   return domain_id;
>
>   if (domain_id ==  -1 || last_nid == MAX_NUMNODES)
>   return NUMA_NO_NODE;
>
>   nid = __affinity_domain_to_nid(domain_id, last_nid);

So this is pseries fast path. Attempt to look up the Linux node for the
given domain, where last_nid is the highest-numbered node in use so
far. If the result is in [0..last_nid] we're done.

>
>   if (nid == NUMA_NO_NODE) {
>   spin_lock(_id_lock);

If the lookup fails enter the critical section. As we discussed offline,
this is a precaution for potentially parallel device probing.

>   /*  recheck with lock held */
>   nid = __affinity_domain_to_nid(domain_id, last_nid);

Attempt the same lookup again. If the result is in [0..last_nid],
another thread has just initialized the mapping for this domain and
we're done.

>   if (nid == NUMA_NO_NODE) {
>   nid = 

Re: [PATCH] sfc_ef100: Fix build failure on powerpc

2020-08-13 Thread Edward Cree
On 13/08/2020 15:39, Christophe Leroy wrote:
> ppc6xx_defconfig fails building sfc.ko module, complaining
> about the lack of _umoddi3 symbol.
>
> This is due to the following test
>
>   if (EFX_MIN_DMAQ_SIZE % reader->value) {
>
> Because reader->value is u64.
Already fixed in net.git by 41077c990266 ("sfc: fix ef100 design-param 
checking").
But thanks anyway.


Re: [PATCH] arch/powerpc: use simple i2c probe function

2020-08-13 Thread Luca Ceresoli
Hi,

On 07/08/20 17:27, Stephen Kitt wrote:
> The i2c probe functions here don't use the id information provided in
> their second argument, so the single-parameter i2c probe function
> ("probe_new") can be used instead.
> 
> This avoids scanning the identifier tables during probes.
> 
> Signed-off-by: Stephen Kitt 

Reviewed-by: Luca Ceresoli 

-- 
Luca


Re: linux-next: runtime warning in Linus' tree

2020-08-13 Thread Roman Gushchin
On Thu, Aug 13, 2020 at 11:20:33AM -0400, Johannes Weiner wrote:
> On Thu, Aug 13, 2020 at 04:46:54PM +1000, Stephen Rothwell wrote:
> > [0.055220][T0] WARNING: CPU: 0 PID: 0 at mm/memcontrol.c:5220 
> > mem_cgroup_css_alloc+0x350/0x904
> 
> > [The line numbers in the final linux next are 5226 and 5141 due to
> > later patches.]
> > 
> > Introduced (or exposed) by commit
> > 
> >   3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent 
> > cgroup")
> > 
> > This commit actually adds the WARN_ON, so it either adds the bug that
> > sets it off, or the bug already existed.
> > 
> > Unfotunately, the version of this patch in linux-next up tuntil today
> > is different.  :-(
> 
> Sorry, I made a last-minute request to include these checks in that
> patch to make the code a bit more robust, but they trigger a false
> positive here. Let's remove them.
> 
> ---
> 
> From de8ea7c96c056c3cbe7b93995029986a158fb9cd Mon Sep 17 00:00:00 2001
> From: Johannes Weiner 
> Date: Thu, 13 Aug 2020 10:40:54 -0400
> Subject: [PATCH] mm: memcontrol: fix warning when allocating the root cgroup
> 
> Commit 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the
> parent cgroup") adds memory tracking to the memcg kernel structures
> themselves to make cgroups liable for the memory they are consuming
> through the allocation of child groups (which can be significant).
> 
> This code is a bit awkward as it's spread out through several
> functions: The outermost function does memalloc_use_memcg(parent) to
> set up current->active_memcg, which designates which cgroup to charge,
> and the inner functions pass GFP_ACCOUNT to request charging for
> specific allocations. To make sure this dependency is satisfied at all
> times - to make sure we don't randomly charge whoever is calling the
> functions - the inner functions warn on !current->active_memcg.
> 
> However, this triggers a false warning when the root memcg itself is
> allocated. No parent exists in this case, and so current->active_memcg
> is rightfully NULL. It's a false positive, not indicative of a bug.
> 
> Delete the warnings for now, we can revisit this later.
> 
> Fixes: 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent 
> cgroup")
> Signed-off-by: Johannes Weiner 

Acked-by: Roman Gushchin 

Thanks!


> ---
>  mm/memcontrol.c | 6 --
>  1 file changed, 6 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d59fd9af6e63..9d87082e64aa 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5137,9 +5137,6 @@ static int alloc_mem_cgroup_per_node_info(struct 
> mem_cgroup *memcg, int node)
>   if (!pn)
>   return 1;
>  
> - /* We charge the parent cgroup, never the current task */
> - WARN_ON_ONCE(!current->active_memcg);
> -
>   pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat,
>GFP_KERNEL_ACCOUNT);
>   if (!pn->lruvec_stat_local) {
> @@ -5222,9 +5219,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
>   goto fail;
>   }
>  
> - /* We charge the parent cgroup, never the current task */
> - WARN_ON_ONCE(!current->active_memcg);
> -
>   memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu,
>   GFP_KERNEL_ACCOUNT);
>   if (!memcg->vmstats_local)
> -- 
> 2.28.0
> 


[PATCH 6/9] powerpc: Remove support for PowerPC 601

2020-08-13 Thread Christophe Leroy
PowerPC 601 has been retired.

Remove all associated specific code.

CPU_FTRS_PPC601 has CPU_FTR_COHERENT_ICACHE and CPU_FTR_COMMON.

CPU_FTR_COMMON is already present via other CPU_FTRS.
None of the remaining CPU selects CPU_FTR_COHERENT_ICACHE.

So CPU_FTRS_PPC601 can be removed from the possible features,
hence can be removed completely.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/boot/util.S| 15 +--
 arch/powerpc/include/asm/cputable.h | 12 ++---
 arch/powerpc/include/asm/ppc_asm.h  |  3 +-
 arch/powerpc/include/asm/ptrace.h   |  4 --
 arch/powerpc/include/asm/time.h |  2 +-
 arch/powerpc/include/asm/timex.h|  3 --
 arch/powerpc/kernel/btext.c |  8 +---
 arch/powerpc/kernel/entry_32.S  | 18 
 arch/powerpc/kernel/head_32.S   | 44 ++
 arch/powerpc/kernel/setup_32.c  |  2 +-
 arch/powerpc/kernel/traps.c |  4 --
 arch/powerpc/kernel/vdso32/datapage.S   |  2 -
 arch/powerpc/kernel/vdso32/vdso32.lds.S |  2 -
 arch/powerpc/mm/book3s32/mmu.c  | 39 +++-
 arch/powerpc/mm/ptdump/bats.c   | 59 -
 arch/powerpc/platforms/powermac/setup.c |  2 +-
 arch/powerpc/platforms/powermac/smp.c   |  4 --
 17 files changed, 17 insertions(+), 206 deletions(-)

diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index f11f0589a669..d03cdb7606dc 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -18,7 +18,7 @@
 
.text
 
-/* udelay (on non-601 processors) needs to know the period of the
+/* udelay needs to know the period of the
  * timebase in nanoseconds.  This used to be hardcoded to be 60ns
  * (period of 66MHz/4).  Now a variable is used that is initialized to
  * 60 for backward compatibility, but it can be overridden as necessary
@@ -37,19 +37,6 @@ timebase_period_ns:
  */
.globl  udelay
 udelay:
-   mfspr   r4,SPRN_PVR
-   srwir4,r4,16
-   cmpwi   0,r4,1  /* 601 ? */
-   bne .Ludelay_not_601
-00:li  r0,86   /* Instructions / microsecond? */
-   mtctr   r0
-10:addir0,r0,0 /* NOP */
-   bdnz10b
-   subic.  r3,r3,1
-   bne 00b
-   blr
-
-.Ludelay_not_601:
mulli   r4,r3,1000  /* nanoseconds */
/*  Change r4 to be the number of ticks using:
 *  (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index fdddb822d564..76ce0ffd8af0 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -294,8 +294,6 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_MAYBE_CAN_NAP  0
 #endif
 
-#define CPU_FTRS_PPC601(CPU_FTR_COMMON | \
-   CPU_FTR_COHERENT_ICACHE)
 #define CPU_FTRS_603   (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_604   (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@@ -511,10 +509,8 @@ static inline void cpu_feature_keys_init(void) { }
 #else
 enum {
CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_601
-   CPU_FTRS_PPC601 |
-#elif defined(CONFIG_PPC_BOOK3S_32)
-   CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
+#ifdef CONFIG_PPC_BOOK3S_32
+   CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
@@ -589,9 +585,7 @@ enum {
 #else
 enum {
CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_601
-   CPU_FTRS_PPC601 &
-#elif defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_PPC_BOOK3S_32
CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 67a421b81a50..511786f0e40d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -401,8 +401,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, 
CPU_FTR_CELL_TB_BUG, 96)
 #define MFTBU(dest)mfspr dest, SPRN_TBRU
 #endif
 
-/* tlbsync is not implemented on 601 */
-#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
+#ifndef CONFIG_SMP
 #define TLBSYNC
 #else
 #define TLBSYNCtlbsync; sync
diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 155a197c0aa1..e2c778c176a3 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -243,11 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs)
 }
 
 #define arch_has_single_step() (1)
-#ifndef CONFIG_PPC_BOOK3S_601
 #define arch_has_block_step()  (true)
-#else

[PATCH 7/9] powerpc: Tidy up a bit after removal of PowerPC 601.

2020-08-13 Thread Christophe Leroy
The removal of the 601 left some standalone blocks from
former if/else. Drop the { } and re-indent.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/btext.c| 11 +++--
 arch/powerpc/mm/book3s32/mmu.c | 45 +++---
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index b609fb39dba8..c22a8e0dbc93 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -95,13 +95,10 @@ void __init btext_prepare_BAT(void)
boot_text_mapped = 0;
return;
}
-   {
-   /* 603, 604, G3, G4, ... */
-   lowbits = addr & ~0xFF00UL;
-   addr &= 0xFF00UL;
-   disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
-   disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW); 
-   }
+   lowbits = addr & ~0xFF00UL;
+   addr &= 0xFF00UL;
+   disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+   disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
logicalDisplayBase = (void *) (vaddr + lowbits);
 }
 #endif
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index f42b718ea971..16546ca4074e 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -73,16 +73,13 @@ unsigned long p_block_mapped(phys_addr_t pa)
 static int find_free_bat(void)
 {
int b;
+   int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 
-   {
-   int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+   for (b = 0; b < n; b++) {
+   struct ppc_bat *bat = BATS[b];
 
-   for (b = 0; b < n; b++) {
-   struct ppc_bat *bat = BATS[b];
-
-   if (!(bat[1].batu & 3))
-   return b;
-   }
+   if (!(bat[1].batu & 3))
+   return b;
}
return -1;
 }
@@ -273,24 +270,22 @@ void __init setbat(int index, unsigned long virt, 
phys_addr_t phys,
flags &= ~_PAGE_COHERENT;
 
bl = (size >> 17) - 1;
-   {
-   /* Do DBAT first */
-   wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
-  | _PAGE_COHERENT | _PAGE_GUARDED);
-   wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
-   bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
-   bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
-   if (flags & _PAGE_USER)
-   bat[1].batu |= 1;   /* Vp = 1 */
-   if (flags & _PAGE_GUARDED) {
-   /* G bit must be zero in IBATs */
-   flags &= ~_PAGE_EXEC;
-   }
-   if (flags & _PAGE_EXEC)
-   bat[0] = bat[1];
-   else
-   bat[0].batu = bat[0].batl = 0;
+   /* Do DBAT first */
+   wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+  | _PAGE_COHERENT | _PAGE_GUARDED);
+   wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+   bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+   bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+   if (flags & _PAGE_USER)
+   bat[1].batu |= 1;   /* Vp = 1 */
+   if (flags & _PAGE_GUARDED) {
+   /* G bit must be zero in IBATs */
+   flags &= ~_PAGE_EXEC;
}
+   if (flags & _PAGE_EXEC)
+   bat[0] = bat[1];
+   else
+   bat[0].batu = bat[0].batl = 0;
 
bat_addrs[index].start = virt;
bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
-- 
2.25.0



[PATCH 9/9] powerpc: Remove get_tb_or_rtc()

2020-08-13 Thread Christophe Leroy
601 is gone, get_tb_or_rtc() is equivalent to get_tb().

Replace the former by the later.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/time.h | 5 -
 arch/powerpc/kernel/irq.c   | 2 +-
 arch/powerpc/kernel/time.c  | 6 +++---
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 37fa99f9783d..c904a8861fa6 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -104,11 +104,6 @@ static inline u64 get_tb(void)
 }
 #endif /* !CONFIG_PPC64 */
 
-static inline u64 get_tb_or_rtc(void)
-{
-   return get_tb();
-}
-
 static inline void set_tb(unsigned int upper, unsigned int lower)
 {
mtspr(SPRN_TBWL, 0);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bf21ebd36190..2d188f81ebdb 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,7 +104,7 @@ static inline notrace unsigned long get_irq_happened(void)
 
 static inline notrace int decrementer_check_overflow(void)
 {
-   u64 now = get_tb_or_rtc();
+   u64 now = get_tb();
u64 *next_tb = this_cpu_ptr(_next_tb);
  
return now >= *next_tb;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 13c820c15d37..760ea359a7f7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -595,7 +595,7 @@ void timer_interrupt(struct pt_regs *regs)
irq_work_run();
}
 
-   now = get_tb_or_rtc();
+   now = get_tb();
if (now >= *next_tb) {
*next_tb = ~(u64)0;
if (evt->event_handler)
@@ -937,7 +937,7 @@ static void __init clocksource_init(void)
 static int decrementer_set_next_event(unsigned long evt,
  struct clock_event_device *dev)
 {
-   __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
+   __this_cpu_write(decrementers_next_tb, get_tb() + evt);
set_dec(evt);
 
/* We may have raced with new irq work */
@@ -1071,7 +1071,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
-   boot_tb = get_tb_or_rtc();
+   boot_tb = get_tb();
 
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
-- 
2.25.0



[PATCH 8/9] powerpc: Remove __USE_RTC()

2020-08-13 Thread Christophe Leroy
Now that PowerPC 601 is gone, __USE_RTC() is never true.

Remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/time.h |  9 +-
 arch/powerpc/kernel/time.c  | 52 +
 2 files changed, 9 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index ce065589192a..37fa99f9783d 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -38,9 +38,6 @@ struct div_result {
u64 result_low;
 };
 
-/* Accessor functions for the timebase (RTC on 601) registers. */
-#define __USE_RTC()(0)
-
 #ifdef CONFIG_PPC64
 
 /* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
@@ -109,7 +106,7 @@ static inline u64 get_tb(void)
 
 static inline u64 get_tb_or_rtc(void)
 {
-   return __USE_RTC() ? get_rtc() : get_tb();
+   return get_tb();
 }
 
 static inline void set_tb(unsigned int upper, unsigned int lower)
@@ -153,10 +150,6 @@ static inline void set_dec(u64 val)
 
 static inline unsigned long tb_ticks_since(unsigned long tstamp)
 {
-   if (__USE_RTC()) {
-   int delta = get_rtcl() - (unsigned int) tstamp;
-   return delta < 0 ? delta + 10 : delta;
-   }
return get_tbl() - tstamp;
 }
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f85539ebb513..13c820c15d37 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -75,15 +75,6 @@
 #include 
 #include 
 
-static u64 rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
-   .name = "rtc",
-   .rating   = 400,
-   .flags= CLOCK_SOURCE_IS_CONTINUOUS,
-   .mask = CLOCKSOURCE_MASK(64),
-   .read = rtc_read,
-};
-
 static u64 timebase_read(struct clocksource *);
 static struct clocksource clocksource_timebase = {
.name = "timebase",
@@ -447,19 +438,9 @@ void vtime_flush(struct task_struct *tsk)
 void __delay(unsigned long loops)
 {
unsigned long start;
-   int diff;
 
spin_begin();
-   if (__USE_RTC()) {
-   start = get_rtcl();
-   do {
-   /* the RTCL register wraps at 10 */
-   diff = get_rtcl() - start;
-   if (diff < 0)
-   diff += 10;
-   spin_cpu_relax();
-   } while (diff < loops);
-   } else if (tb_invalid) {
+   if (tb_invalid) {
/*
 * TB is in error state and isn't ticking anymore.
 * HMI handler was unable to recover from TB error.
@@ -696,8 +677,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
  */
 notrace unsigned long long sched_clock(void)
 {
-   if (__USE_RTC())
-   return get_rtc();
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
@@ -847,11 +826,6 @@ void read_persistent_clock64(struct timespec64 *ts)
 }
 
 /* clocksource code */
-static notrace u64 rtc_read(struct clocksource *cs)
-{
-   return (u64)get_rtc();
-}
-
 static notrace u64 timebase_read(struct clocksource *cs)
 {
return (u64)get_tb();
@@ -948,12 +922,7 @@ void update_vsyscall_tz(void)
 
 static void __init clocksource_init(void)
 {
-   struct clocksource *clock;
-
-   if (__USE_RTC())
-   clock = _rtc;
-   else
-   clock = _timebase;
+   struct clocksource *clock = _timebase;
 
if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -1071,17 +1040,12 @@ void __init time_init(void)
u64 scale;
unsigned shift;
 
-   if (__USE_RTC()) {
-   /* 601 processor: dec counts down by 128 every 128ns */
-   ppc_tb_freq = 10;
-   } else {
-   /* Normal PowerPC with timebase register */
-   ppc_md.calibrate_decr();
-   printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu 
MHz\n",
-  ppc_tb_freq / 100, ppc_tb_freq % 100);
-   printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu 
MHz\n",
-  ppc_proc_freq / 100, ppc_proc_freq % 100);
-   }
+   /* Normal PowerPC with timebase register */
+   ppc_md.calibrate_decr();
+   printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+  ppc_tb_freq / 100, ppc_tb_freq % 100);
+   printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
+  ppc_proc_freq / 100, ppc_proc_freq % 100);
 
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
-- 
2.25.0



[PATCH 4/9] powerpc: Drop SYNC_601() ISYNC_601() and SYNC()

2020-08-13 Thread Christophe Leroy
Those macros are now empty at all time. Drop them.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/ppc_asm.h  |  4 
 arch/powerpc/kernel/entry_32.S  | 17 +
 arch/powerpc/kernel/fpu.S   |  1 -
 arch/powerpc/kernel/head_32.S   |  9 -
 arch/powerpc/kernel/head_32.h   |  1 -
 arch/powerpc/kernel/l2cr_6xx.S  |  3 +--
 arch/powerpc/kernel/misc_32.S   |  1 -
 arch/powerpc/mm/book3s32/hash_low.S | 12 
 8 files changed, 2 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 0b9dc814b81c..67a421b81a50 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -382,10 +382,6 @@ GLUE(.,name):
 #endif
 
 /* various errata or part fixups */
-#defineSYNC
-#define SYNC_601
-#define ISYNC_601
-
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define MFTB(dest) \
 90:mfspr dest, SPRN_TBRL;  \
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index f4d0af8e1136..f25ea188ecd3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -234,7 +234,6 @@ transfer_to_handler_cont:
mtspr   SPRN_SRR0,r11
mtspr   SPRN_SRR1,r10
mtlrr9
-   SYNC
RFI /* jump to handler, enable MMU */
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -264,7 +263,6 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr   SPRN_SRR0,r12
mtspr   SPRN_SRR1,r0
-   SYNC
RFI
 
 reenable_mmu:
@@ -323,7 +321,6 @@ stack_ovf:
 #endif
mtspr   SPRN_SRR0,r9
mtspr   SPRN_SRR1,r10
-   SYNC
RFI
 _ASM_NOKPROBE_SYMBOL(stack_ovf)
 #endif
@@ -411,7 +408,6 @@ ret_from_syscall:
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)  /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
-   SYNC
mtmsr   r10
lwz r9,TI_FLAGS(r2)
li  r8,-MAX_ERRNO
@@ -474,7 +470,6 @@ syscall_exit_finish:
 #endif
mtspr   SPRN_SRR0,r7
mtspr   SPRN_SRR1,r8
-   SYNC
RFI
 _ASM_NOKPROBE_SYMBOL(syscall_exit_finish)
 #ifdef CONFIG_44x
@@ -567,7 +562,6 @@ syscall_exit_work:
 * lockdep as we are supposed to have IRQs on at this point
 */
ori r10,r10,MSR_EE
-   SYNC
mtmsr   r10
 
/* Save NVGPRS if they're not saved already */
@@ -606,7 +600,6 @@ ret_from_kernel_syscall:
 #endif
mtspr   SPRN_SRR0, r9
mtspr   SPRN_SRR1, r10
-   SYNC
RFI
 _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
 
@@ -810,7 +803,6 @@ fast_exception_return:
REST_GPR(9, r11)
REST_GPR(12, r11)
lwz r11,GPR11(r11)
-   SYNC
RFI
 _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
@@ -872,7 +864,6 @@ ret_from_except:
 * from the interrupt. */
/* Note: We don't bother telling lockdep about it */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-   SYNC/* Some chip revs have problems here... */
mtmsr   r10 /* disable interrupts */
 
lwz r3,_MSR(r1) /* Returning to user mode? */
@@ -1035,7 +1026,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 * exc_exit_restart below.  -- paulus
 */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
-   SYNC
mtmsr   r10 /* clear the RI bit */
.globl exc_exit_restart
 exc_exit_restart:
@@ -1046,7 +1036,6 @@ exc_exit_restart:
lwz r1,GPR1(r1)
.globl exc_exit_restart_end
 exc_exit_restart_end:
-   SYNC
RFI
 _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
@@ -1274,7 +1263,6 @@ do_resched:   /* r10 contains 
MSR_KERNEL here */
mfmsr   r10
 #endif
ori r10,r10,MSR_EE
-   SYNC
mtmsr   r10 /* hard-enable interrupts */
bl  schedule
 recheck:
@@ -1283,7 +1271,6 @@ recheck:
 * TI_FLAGS aren't advertised.
 */
LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-   SYNC
mtmsr   r10 /* disable interrupts */
lwz r9,TI_FLAGS(r2)
andi.   r0,r9,_TIF_NEED_RESCHED
@@ -1292,7 +1279,6 @@ recheck:
beq restore_user
 do_user_signal:/* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
-   SYNC
mtmsr   r10 /* hard-enable interrupts */
/* save r13-r31 in the exception frame, if not already done */
lwz r3,_TRAP(r1)
@@ -1382,8 +1368,7 @@ _GLOBAL(enter_rtas)
mfmsr   r9
stw r9,8(r1)
LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
-   SYNC/* disable interrupts so 

[PATCH 2/9] powerpc: Remove SYNC on non 6xx

2020-08-13 Thread Christophe Leroy
SYNC is usefull for Powerpc 601 only. On everything else,
SYNC is empty.

Remove it from code that is not made to run on 6xx.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_40x.S   | 1 -
 arch/powerpc/kernel/head_booke.h | 1 -
 arch/powerpc/kernel/misc_64.S| 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 5b282d9965a5..44c9018aed1b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -72,7 +72,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr   SPRN_SRR0,r0
-   SYNC
rfi /* enables MMU */
b   .   /* prevent prefetch past rfi */
 
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 18f87bf9e32b..71c359d438b5 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -176,7 +176,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #endif
mtspr   SPRN_SRR1,r10
mtspr   SPRN_SRR0,r11
-   SYNC
RFI /* jump to handler, enable MMU */
 99:b   ret_from_kernel_syscall
 .endm
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 7bb46ad98207..070465825c21 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -365,7 +365,6 @@ _GLOBAL(kexec_smp_wait)
 
li  r4,KEXEC_STATE_REAL_MODE
stb r4,PACAKEXECSTATE(r13)
-   SYNC
 
b   kexec_wait
 
-- 
2.25.0



[PATCH 5/9] powerpc: Remove PowerPC 601

2020-08-13 Thread Christophe Leroy
Powerpc 601 is 25 years old.

It is not selected by any defconfig.

It requires a lot of special handling as it deviates from the
standard 6xx.

Retire it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/cputable.c | 15 ---
 arch/powerpc/platforms/Kconfig.cputype | 11 ++-
 2 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 3d406a9626e8..1338ed6e545b 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -592,21 +592,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_601
-   {   /* 601 */
-   .pvr_mask   = 0x,
-   .pvr_value  = 0x0001,
-   .cpu_name   = "601",
-   .cpu_features   = CPU_FTRS_PPC601,
-   .cpu_user_features  = COMMON_USER | PPC_FEATURE_601_INSTR |
-   PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
-   .mmu_features   = MMU_FTR_HPTE_TABLE,
-   .icache_bsize   = 32,
-   .dcache_bsize   = 32,
-   .machine_check  = machine_check_generic,
-   .platform   = "ppc601",
-   },
-#endif /* CONFIG_PPC_BOOK3S_601 */
 #ifdef CONFIG_PPC_BOOK3S_6xx
{   /* 603 */
.pvr_mask   = 0x,
diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index 87737ec86d39..2b39589a6a8a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -20,7 +20,7 @@ choice
depends on PPC32
help
  There are five families of 32 bit PowerPC chips supported.
- The most common ones are the desktop and server CPUs (601, 603,
+ The most common ones are the desktop and server CPUs (603,
  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
  embedded 512x/52xx/82xx/83xx/86xx counterparts.
  The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
@@ -30,7 +30,7 @@ choice
  If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
 
 config PPC_BOOK3S_6xx
-   bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+   bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_BOOK3S_32
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
@@ -38,13 +38,6 @@ config PPC_BOOK3S_6xx
select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK
 
-config PPC_BOOK3S_601
-   bool "PowerPC 601"
-   select PPC_BOOK3S_32
-   select PPC_FPU
-   select PPC_HAVE_KUAP
-   select HAVE_ARCH_VMAP_STACK
-
 config PPC_85xx
bool "Freescale 85xx"
select E500
-- 
2.25.0



[PATCH 1/9] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christophe Leroy
The only callers of flush_instruction_cache() are:

arch/powerpc/kernel/swsusp_booke.S: bl flush_instruction_cache
arch/powerpc/mm/nohash/40x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/44x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/fsl_booke.c: flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();

This function is not used by book3s/32, drop it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..bd870743c06f 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -271,9 +271,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 /*
  * Flush instruction cache.
- * This is a no-op on the 601.
  */
-#ifndef CONFIG_PPC_8xx
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
 _GLOBAL(flush_instruction_cache)
 #if defined(CONFIG_4xx)
lis r3, KERNELBASE@h
@@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-   blr /* for 601, do nothing */
-#else
-   /* 603/604 processor - use invalidate-all bit in HID0 */
-   mfspr   r3,SPRN_HID0
-   ori r3,r3,HID0_ICFI
-   mtspr   SPRN_HID0,r3
 #endif /* CONFIG_4xx */
isync
blr
 EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
+#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */
 
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
-- 
2.25.0



[PATCH 3/9] powerpc: Remove CONFIG_PPC601_SYNC_FIX

2020-08-13 Thread Christophe Leroy
This config option isn't in any defconfig.

The very first versions of Powerpc 601 have a bug which
requires additional sync before and/or after some instructions.

This was more than 25 years ago and time has come to retire
those buggy versions of the 601 from the kernel.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/ppc_asm.h |  6 --
 arch/powerpc/platforms/Kconfig | 15 ---
 2 files changed, 21 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index b4cc6608131c..0b9dc814b81c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -382,15 +382,9 @@ GLUE(.,name):
 #endif
 
 /* various errata or part fixups */
-#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC   sync; isync
-#define SYNC_601   sync
-#define ISYNC_601  isync
-#else
 #defineSYNC
 #define SYNC_601
 #define ISYNC_601
-#endif
 
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define MFTB(dest) \
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fb7515b4fa9c..f377a56ecc85 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -199,21 +199,6 @@ source "drivers/cpuidle/Kconfig"
 
 endmenu
 
-config PPC601_SYNC_FIX
-   bool "Workarounds for PPC601 bugs"
-   depends on PPC_BOOK3S_601 && PPC_PMAC
-   default y
-   help
- Some versions of the PPC601 (the first PowerPC chip) have bugs which
- mean that extra synchronization instructions are required near
- certain instructions, typically those that make major changes to the
- CPU state.  These extra instructions reduce performance slightly.
- If you say N here, these extra instructions will not be included,
- resulting in a kernel which will run faster but may not run at all
- on some systems with the PPC601 chip.
-
- If in doubt, say Y here.
-
 config TAU
bool "On-chip CPU temperature sensor support"
depends on PPC_BOOK3S_32
-- 
2.25.0



Re: [PATCH v2 3/4] powerpc/memhotplug: Make lmb size 64bit

2020-08-13 Thread Sasha Levin
Hi

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has tested the following trees: v5.8, v5.7.14, v5.4.57, v4.19.138, 
v4.14.193, v4.9.232, v4.4.232.

v5.8: Build OK!
v5.7.14: Build OK!
v5.4.57: Build OK!
v4.19.138: Failed to apply! Possible dependencies:
Unable to calculate

v4.14.193: Failed to apply! Possible dependencies:
Unable to calculate

v4.9.232: Failed to apply! Possible dependencies:
1a367063ca0c ("powerpc/pseries: Check memory device state before 
onlining/offlining")
25b587fba9a4 ("powerpc/pseries: Correct possible read beyond dlpar sysfs 
buffer")
333f7b76865b ("powerpc/pseries: Implement indexed-count hotplug memory add")
753843471cbb ("powerpc/pseries: Implement indexed-count hotplug memory 
remove")
943db62c316c ("powerpc/pseries: Revert 'Auto-online hotplugged memory'")
c21f515c7436 ("powerpc/pseries: Make the acquire/release of the drc for 
memory a seperate step")
e70d59700fc3 ("powerpc/pseries: Introduce memory hotplug READD operation")
f84775c2d5d9 ("powerpc/pseries: Fix build break when MEMORY_HOTREMOVE=n")

v4.4.232: Failed to apply! Possible dependencies:
183deeea5871 ("powerpc/pseries: Consolidate CPU hotplug code to 
hotplug-cpu.c")
1a367063ca0c ("powerpc/pseries: Check memory device state before 
onlining/offlining")
1dc759566636 ("powerpc/pseries: Use kernel hotplug queue for PowerVM 
hotplug events")
1f859adb9253 ("powerpc/pseries: Verify CPU doesn't exist before adding")
25b587fba9a4 ("powerpc/pseries: Correct possible read beyond dlpar sysfs 
buffer")
333f7b76865b ("powerpc/pseries: Implement indexed-count hotplug memory add")
4a4bdfea7cb7 ("powerpc/pseries: Refactor dlpar_add_lmb() code")
753843471cbb ("powerpc/pseries: Implement indexed-count hotplug memory 
remove")
9054619ef54a ("powerpc/pseries: Add pseries hotplug workqueue")
943db62c316c ("powerpc/pseries: Revert 'Auto-online hotplugged memory'")
9dc512819e4b ("powerpc: Fix unused function warning 'lmb_to_memblock'")
bdf5fc633804 ("powerpc/pseries: Update LMB associativity index during DLPAR 
add/remove")
c21f515c7436 ("powerpc/pseries: Make the acquire/release of the drc for 
memory a seperate step")
e70d59700fc3 ("powerpc/pseries: Introduce memory hotplug READD operation")
e9d764f80396 ("powerpc/pseries: Enable kernel CPU dlpar from sysfs")
ec999072442a ("powerpc/pseries: Auto-online hotplugged memory")
f84775c2d5d9 ("powerpc/pseries: Fix build break when MEMORY_HOTREMOVE=n")
fdb4f6e99ffa ("powerpc/pseries: Remove call to memblock_add()")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha


[PATCH] powerpc/book3s64/radix: Fix boot failure with large amount of guest memory

2020-08-13 Thread Aneesh Kumar K.V
If the hypervisor doesn't support hugepages, the kernel ends up allocating a 
large
number of page table pages. The early page table allocation was wrongly
setting the max memblock limit to ppc64_rma_size with radix translation
which resulted in boot failure as shown below.

Kernel panic - not syncing:
early_alloc_pgtable: Failed to allocate 16777216 bytes align=0x100 nid=-1 
from=0x max_addr=0x
 CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-24.9-default+ #2
 Call Trace:
 [c16f3d00] [c07c6470] dump_stack+0xc4/0x114 (unreliable)
 [c16f3d40] [c014c78c] panic+0x164/0x418
 [c16f3dd0] [c0098890] early_alloc_pgtable+0xe0/0xec
 [c16f3e60] [c10a5440] radix__early_init_mmu+0x360/0x4b4
 [c16f3ef0] [c1099bac] early_init_mmu+0x1c/0x3c
 [c16f3f10] [c109a320] early_setup+0x134/0x170

This was because the kernel was checking for the radix feature before we enable 
the
feature via mmu_features. This resulted in the kernel using hash restrictions on
radix.

Rework the early init code such that the kernel boot with memblock restrictions
as imposed by hash. At that point, the kernel still hasn't finalized the
translation the kernel will end up using.

We have three different ways of detecting radix.

1. dt_cpu_ftrs_scan -> used only in case of PowerNV
2. ibm,pa-features -> Used when we don't use cpu_dt_ftr_scan
3. CAS -> Where we negotiate with hypervisor about the supported translation.

We look at 1 or 2 early in the boot and after that, we look at the CAS vector to
finalize the translation the kernel will use. We also support a kernel command
line option (disable_radix) to switch to hash.

Update the memblock limit after mmu_early_init_devtree() if the kernel is going
to use radix translation. This forces some of the memblock allocations we do 
before
mmu_early_init_devtree() to be within the RMA limit.

Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init 
routines")
Reported-by: Shirisha Ganta 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 8 +---
 arch/powerpc/kernel/prom.c   | 6 ++
 arch/powerpc/mm/book3s64/radix_pgtable.c | 2 ++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 55442d45c597..4245f99453f5 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -244,9 +244,11 @@ extern void radix__setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
 static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
  phys_addr_t first_memblock_size)
 {
-   if (early_radix_enabled())
-   return radix__setup_initial_memory_limit(first_memblock_base,
-  first_memblock_size);
+   /*
+* Hash has more strict restrictions. At this point we don't
+* know which translations we will pick. Hence got with hash
+* restrictions.
+*/
return hash__setup_initial_memory_limit(first_memblock_base,
   first_memblock_size);
 }
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d8a2fb87ba0c..340900ae95a4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -811,6 +811,12 @@ void __init early_init_devtree(void *params)
 
mmu_early_init_devtree();
 
+   /*
+* Reset ppc64_rma_size and memblock memory limit
+*/
+   if (early_radix_enabled())
+   radix__setup_initial_memory_limit(memstart_addr, 
first_memblock_size);
+
 #ifdef CONFIG_PPC_POWERNV
/* Scan and build the list of machine check recoverable ranges */
of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 28c784976bed..094daf16acac 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -747,6 +747,8 @@ void radix__setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
 * Radix mode is not limited by RMA / VRMA addressing.
 */
ppc64_rma_size = ULONG_MAX;
+
+   memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-- 
2.26.2



Re: [PATCH] sfc_ef100: Fix build failure on powerpc

2020-08-13 Thread Segher Boessenkool
On Thu, Aug 13, 2020 at 02:39:10PM +, Christophe Leroy wrote:
> ppc6xx_defconfig fails building sfc.ko module, complaining
> about the lack of _umoddi3 symbol.
> 
> This is due to the following test
> 
>   if (EFX_MIN_DMAQ_SIZE % reader->value) {
> 
> Because reader->value is u64.
> 
> As EFX_MIN_DMAQ_SIZE value is 512, reader->value is obviously small
> enough for an u32 calculation, so cast it as (u32) for the test, to
> avoid the need for _umoddi3.

That isn't the same e.g. if reader->value is 2**32 + small.  Which
probably cannot happen, but :-)


Segher


Re: linux-next: runtime warning in Linus' tree

2020-08-13 Thread Johannes Weiner
On Thu, Aug 13, 2020 at 04:46:54PM +1000, Stephen Rothwell wrote:
> [0.055220][T0] WARNING: CPU: 0 PID: 0 at mm/memcontrol.c:5220 
> mem_cgroup_css_alloc+0x350/0x904

> [The line numbers in the final linux next are 5226 and 5141 due to
> later patches.]
> 
> Introduced (or exposed) by commit
> 
>   3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent cgroup")
> 
> This commit actually adds the WARN_ON, so it either adds the bug that
> sets it off, or the bug already existed.
> 
> Unfotunately, the version of this patch in linux-next up tuntil today
> is different.  :-(

Sorry, I made a last-minute request to include these checks in that
patch to make the code a bit more robust, but they trigger a false
positive here. Let's remove them.

---

>From de8ea7c96c056c3cbe7b93995029986a158fb9cd Mon Sep 17 00:00:00 2001
From: Johannes Weiner 
Date: Thu, 13 Aug 2020 10:40:54 -0400
Subject: [PATCH] mm: memcontrol: fix warning when allocating the root cgroup

Commit 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the
parent cgroup") adds memory tracking to the memcg kernel structures
themselves to make cgroups liable for the memory they are consuming
through the allocation of child groups (which can be significant).

This code is a bit awkward as it's spread out through several
functions: The outermost function does memalloc_use_memcg(parent) to
set up current->active_memcg, which designates which cgroup to charge,
and the inner functions pass GFP_ACCOUNT to request charging for
specific allocations. To make sure this dependency is satisfied at all
times - to make sure we don't randomly charge whoever is calling the
functions - the inner functions warn on !current->active_memcg.

However, this triggers a false warning when the root memcg itself is
allocated. No parent exists in this case, and so current->active_memcg
is rightfully NULL. It's a false positive, not indicative of a bug.

Delete the warnings for now, we can revisit this later.

Fixes: 3e38e0aaca9e ("mm: memcg: charge memcg percpu memory to the parent 
cgroup")
Signed-off-by: Johannes Weiner 
---
 mm/memcontrol.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d59fd9af6e63..9d87082e64aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5137,9 +5137,6 @@ static int alloc_mem_cgroup_per_node_info(struct 
mem_cgroup *memcg, int node)
if (!pn)
return 1;
 
-   /* We charge the parent cgroup, never the current task */
-   WARN_ON_ONCE(!current->active_memcg);
-
pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat,
 GFP_KERNEL_ACCOUNT);
if (!pn->lruvec_stat_local) {
@@ -5222,9 +5219,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
goto fail;
}
 
-   /* We charge the parent cgroup, never the current task */
-   WARN_ON_ONCE(!current->active_memcg);
-
memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu,
GFP_KERNEL_ACCOUNT);
if (!memcg->vmstats_local)
-- 
2.28.0



[PATCH v3] powerpc/pseries: explicitly reschedule during drmem_lmb list traversal

2020-08-13 Thread Nathan Lynch
The drmem lmb list can have hundreds of thousands of entries, and
unfortunately lookups take the form of linear searches. As long as
this is the case, traversals have the potential to monopolize the CPU
and provoke lockup reports, workqueue stalls, and the like unless
they explicitly yield.

Rather than placing cond_resched() calls within various
for_each_drmem_lmb() loop blocks in the code, put it in the iteration
expression of the loop macro itself so users can't omit it.

Introduce a drmem_lmb_next() iteration helper function which calls
cond_resched() at a regular interval during array traversal. Each
iteration of the loop in DLPAR code paths can involve around ten RTAS
calls which can each take up to 250us, so this ensures the check is
performed at worst every few milliseconds.

Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT 
format")
Signed-off-by: Nathan Lynch 
---

Notes:
Changes since v2:
* Make drmem_lmb_next() more general.
* Adjust reschedule interval for better code generation.
* Add commentary to drmem_lmb_next() to explain the cond_resched()
  call.
* Remove bounds assertions.

Changes since v1:
* Add bounds assertions in drmem_lmb_next().
* Call cond_resched() in the iterator on only every 20th element
  instead of on every iteration, to reduce overhead in tight loops.

 arch/powerpc/include/asm/drmem.h | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 17ccc6474ab6..6fb928605ed1 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_POWERPC_LMB_H
 #define _ASM_POWERPC_LMB_H
 
+#include 
+
 struct drmem_lmb {
u64 base_addr;
u32 drc_index;
@@ -26,8 +28,22 @@ struct drmem_lmb_info {
 
 extern struct drmem_lmb_info *drmem_info;
 
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
+  const struct drmem_lmb *start)
+{
+   /*
+* DLPAR code paths can take several milliseconds per element
+* when interacting with firmware. Ensure that we don't
+* unfairly monopolize the CPU.
+*/
+   if (((++lmb - start) % 16) == 0)
+   cond_resched();
+
+   return lmb;
+}
+
 #define for_each_drmem_lmb_in_range(lmb, start, end)   \
-   for ((lmb) = (start); (lmb) < (end); (lmb)++)
+   for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
 
 #define for_each_drmem_lmb(lmb)\
for_each_drmem_lmb_in_range((lmb),  \
-- 
2.25.4



[PATCH] sfc_ef100: Fix build failure on powerpc

2020-08-13 Thread Christophe Leroy
ppc6xx_defconfig fails building sfc.ko module, complaining
about the lack of _umoddi3 symbol.

This is due to the following test

if (EFX_MIN_DMAQ_SIZE % reader->value) {

Because reader->value is u64.

As EFX_MIN_DMAQ_SIZE value is 512, reader->value is obviously small
enough for an u32 calculation, so cast it as (u32) for the test, to
avoid the need for _umoddi3.

Fixes: adcfc3482fff ("sfc_ef100: read Design Parameters at probe time")
Signed-off-by: Christophe Leroy 
---
 drivers/net/ethernet/sfc/ef100_nic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ef100_nic.c 
b/drivers/net/ethernet/sfc/ef100_nic.c
index 36598d0542ed..234400b69b07 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -979,7 +979,7 @@ static int ef100_process_design_param(struct efx_nic *efx,
 * EFX_MIN_DMAQ_SIZE is divisible by GRANULARITY.
 * This is very unlikely to fail.
 */
-   if (EFX_MIN_DMAQ_SIZE % reader->value) {
+   if (EFX_MIN_DMAQ_SIZE % (u32)reader->value) {
netif_err(efx, probe, efx->net_dev,
  "%s size granularity is %llu, can't guarantee 
safety\n",
  reader->type == 
ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ",
-- 
2.25.0



Re: [PATCH] powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute

2020-08-13 Thread Aneesh Kumar K.V

On 8/13/20 10:04 AM, Vaibhav Jain wrote:

The newly introduced 'perf_stats' attribute uses the default access
mode of 0444 letting non-root users access performance stats of an
nvdimm and potentially force the kernel into issuing large number of
expensive HCALLs. Since the information exposed by this attribute
cannot be cached hence its better to ward of access to this attribute
from users who don't need to access these performance statistics.

Hence this patch adds check in perf_stats_show() to only let users
that are 'perfmon_capable()' to read the nvdimm performance
statistics.

Fixes: 2d02bf835e573 ('powerpc/papr_scm: Fetch nvdimm performance stats from 
PHYP')
Reported-by: Aneesh Kumar K.V 
Signed-off-by: Vaibhav Jain 
---
  arch/powerpc/platforms/pseries/papr_scm.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index f439f0dfea7d1..36c51bf8af9a8 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -792,6 +792,10 @@ static ssize_t perf_stats_show(struct device *dev,
struct nvdimm *dimm = to_nvdimm(dev);
struct papr_scm_priv *p = nvdimm_provider_data(dimm);
  
+	/* Allow access only to perfmon capable users */

+   if (!perfmon_capable())
+   return -EACCES;
+


An access check is usually done in open(). This is the read callback IIUC.


if (!p->stat_buffer_len)
return -ENOENT;
  



-aneesh


Re: [PATCH 1/5] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christoph Hellwig
On Thu, Aug 13, 2020 at 01:13:08PM +0100, Christoph Hellwig wrote:
> On Thu, Aug 13, 2020 at 10:12:00AM +, Christophe Leroy wrote:
> > -#ifndef CONFIG_PPC_8xx
> > +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
> >  _GLOBAL(flush_instruction_cache)
> >  #if defined(CONFIG_4xx)
> > lis r3, KERNELBASE@h
> > @@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
> > mfspr   r3,SPRN_L1CSR1
> > ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
> > mtspr   SPRN_L1CSR1,r3
> > -#elif defined(CONFIG_PPC_BOOK3S_601)
> > -   blr /* for 601, do nothing */
> > -#else
> > -   /* 603/604 processor - use invalidate-all bit in HID0 */
> > -   mfspr   r3,SPRN_HID0
> > -   ori r3,r3,HID0_ICFI
> > -   mtspr   SPRN_HID0,r3
> >  #endif /* CONFIG_4xx */
> > isync
> > blr
> >  EXPORT_SYMBOL(flush_instruction_cache)
> > -#endif /* CONFIG_PPC_8xx */
> > +#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */
> 
> What about untangling this into entirely separate versions instead
> of the ifdef mess?  Also the export does not seem to be needed at all.

Ok, I see that you do that later, sorry.


Re: [PATCH 1/5] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christoph Hellwig
On Thu, Aug 13, 2020 at 10:12:00AM +, Christophe Leroy wrote:
> -#ifndef CONFIG_PPC_8xx
> +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
>  _GLOBAL(flush_instruction_cache)
>  #if defined(CONFIG_4xx)
>   lis r3, KERNELBASE@h
> @@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
>   mfspr   r3,SPRN_L1CSR1
>   ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
>   mtspr   SPRN_L1CSR1,r3
> -#elif defined(CONFIG_PPC_BOOK3S_601)
> - blr /* for 601, do nothing */
> -#else
> - /* 603/604 processor - use invalidate-all bit in HID0 */
> - mfspr   r3,SPRN_HID0
> - ori r3,r3,HID0_ICFI
> - mtspr   SPRN_HID0,r3
>  #endif /* CONFIG_4xx */
>   isync
>   blr
>  EXPORT_SYMBOL(flush_instruction_cache)
> -#endif /* CONFIG_PPC_8xx */
> +#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */

What about untangling this into entirely separate versions instead
of the ifdef mess?  Also the export does not seem to be needed at all.


[PATCH 5/5] powerpc: Rewrite 4xx flush_cache_instruction() in C

2020-08-13 Thread Christophe Leroy
Nothing prevent flush_cache_instruction() from behing writen in C.

Do it to improve readability and maintainability.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S   | 13 -
 arch/powerpc/mm/nohash/4xx.c| 15 +++
 arch/powerpc/mm/nohash/Makefile |  1 +
 3 files changed, 16 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/mm/nohash/4xx.c

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 4f4a31d9fdd0..87717966f5cd 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -268,19 +268,6 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 #endif /* CONFIG_40x */
 
-
-/*
- * Flush instruction cache.
- */
-#ifdef CONFIG_4xx
-_GLOBAL(flush_instruction_cache)
-   lis r3, KERNELBASE@h
-   iccci   0,r3
-   isync
-   blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif
-
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
diff --git a/arch/powerpc/mm/nohash/4xx.c b/arch/powerpc/mm/nohash/4xx.c
new file mode 100644
index ..954c8aa42a32
--- /dev/null
+++ b/arch/powerpc/mm/nohash/4xx.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ */
+
+#include 
+#include 
+#include 
+
+void flush_instruction_cache(void)
+{
+   iccci((void*)KERNELBASE);
+   isync();
+}
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index 0424f6ce5bd8..a7f7211b6373 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
 obj-y  += mmu_context.o tlb.o tlb_low.o
 obj-$(CONFIG_PPC_BOOK3E_64)+= tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_4xx)  += 4xx.o
 obj-$(CONFIG_40x)  += 40x.o
 obj-$(CONFIG_44x)  += 44x.o
 obj-$(CONFIG_PPC_8xx)  += 8xx.o
-- 
2.25.0



[PATCH 4/5] powerpc: Rewrite FSL_BOOKE flush_cache_instruction() in C

2020-08-13 Thread Christophe Leroy
Nothing prevent flush_cache_instruction() from behing writen in C.

Do it to improve readability and maintainability.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S  | 17 -
 arch/powerpc/mm/nohash/fsl_booke.c | 16 
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index a8f6ef513115..4f4a31d9fdd0 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -281,23 +281,6 @@ _GLOBAL(flush_instruction_cache)
 EXPORT_SYMBOL(flush_instruction_cache)
 #endif
 
-#ifdef CONFIG_FSL_BOOKE
-_GLOBAL(flush_instruction_cache)
-#ifdef CONFIG_E200
-   mfspr   r3,SPRN_L1CSR0
-   ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
-   /* msync; isync recommended here */
-   mtspr   SPRN_L1CSR0,r3
-#else
-   mfspr   r3,SPRN_L1CSR1
-   ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
-   mtspr   SPRN_L1CSR1,r3
-#endif
-   isync
-   blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif
-
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c 
b/arch/powerpc/mm/nohash/fsl_booke.c
index 0c294827d6e5..36bda962d3b3 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -219,6 +219,22 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
 }
 
+void flush_instruction_cache(void)
+{
+   unsigned long tmp;
+
+   if (IS_ENABLED(CONFIG_E200)) {
+   tmp = mfspr(SPRN_L1CSR0);
+   tmp |= L1CSR0_CFI | L1CSR0_CLFC;
+   mtspr(SPRN_L1CSR0, tmp);
+   } else {
+   tmp = mfspr(SPRN_L1CSR1);
+   tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+   mtspr(SPRN_L1CSR1, tmp);
+   }
+   isync();
+}
+
 /*
  * MMU_init_hw does the chip-specific initialization of the MMU hardware.
  */
-- 
2.25.0



[PATCH 3/5] powerpc: Remove flush_instruction_cache() on 8xx

2020-08-13 Thread Christophe Leroy
flush_instruction_cache() is never used on 8xx, remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/nohash/8xx.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index d2b37146ae6c..231ca95f9ffb 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -244,13 +244,6 @@ void set_context(unsigned long id, pgd_t *pgd)
mb();
 }
 
-void flush_instruction_cache(void)
-{
-   isync();
-   mtspr(SPRN_IC_CST, IDC_INVALL);
-   isync();
-}
-
 #ifdef CONFIG_PPC_KUEP
 void __init setup_kuep(bool disabled)
 {
-- 
2.25.0



[PATCH 2/5] powerpc: Untangle flush_instruction_cache()

2020-08-13 Thread Christophe Leroy
flush_instruction_cache() is a mixup of each PPC32 sub-arch.

Untangle it by making one complete function for each sub-arch.

This makes it a lot more readable and maintainable.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index bd870743c06f..a8f6ef513115 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -272,28 +272,31 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 /*
  * Flush instruction cache.
  */
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
+#ifdef CONFIG_4xx
 _GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
lis r3, KERNELBASE@h
iccci   0,r3
-#elif defined(CONFIG_FSL_BOOKE)
+   isync
+   blr
+EXPORT_SYMBOL(flush_instruction_cache)
+#endif
+
+#ifdef CONFIG_FSL_BOOKE
+_GLOBAL(flush_instruction_cache)
 #ifdef CONFIG_E200
mfspr   r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr   SPRN_L1CSR0,r3
-   isync
-   blr
-#endif
+#else
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#endif /* CONFIG_4xx */
+#endif
isync
blr
 EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */
+#endif
 
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
-- 
2.25.0



[PATCH 1/5] powerpc: Remove flush_instruction_cache for book3s/32

2020-08-13 Thread Christophe Leroy
The only callers of flush_instruction_cache() are:

arch/powerpc/kernel/swsusp_booke.S: bl flush_instruction_cache
arch/powerpc/mm/nohash/40x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/44x.c:   flush_instruction_cache();
arch/powerpc/mm/nohash/fsl_booke.c: flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();
arch/powerpc/platforms/44x/machine_check.c: 
flush_instruction_cache();

This function is not used by book3s/32, drop it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..bd870743c06f 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -271,9 +271,8 @@ _ASM_NOKPROBE_SYMBOL(real_writeb)
 
 /*
  * Flush instruction cache.
- * This is a no-op on the 601.
  */
-#ifndef CONFIG_PPC_8xx
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC_BOOK3S_32)
 _GLOBAL(flush_instruction_cache)
 #if defined(CONFIG_4xx)
lis r3, KERNELBASE@h
@@ -290,18 +289,11 @@ _GLOBAL(flush_instruction_cache)
mfspr   r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr   SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-   blr /* for 601, do nothing */
-#else
-   /* 603/604 processor - use invalidate-all bit in HID0 */
-   mfspr   r3,SPRN_HID0
-   ori r3,r3,HID0_ICFI
-   mtspr   SPRN_HID0,r3
 #endif /* CONFIG_4xx */
isync
blr
 EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
+#endif /* CONFIG_PPC_8xx || CONFIG_PPC_BOOK3S_32 */
 
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
-- 
2.25.0



[PATCH] powerpc: Drop _nmask_and_or_msr()

2020-08-13 Thread Christophe Leroy
_nmask_and_or_msr() is only used at two places to set MSR_IP.

The SYNC is unnecessary as the users are not PowerPC 601.

Can be easily writen in C.

Do it, and drop _nmask_and_or_msr()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/misc_32.S | 13 -
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c |  3 ++-
 arch/powerpc/platforms/embedded6xx/storcenter.c   |  3 ++-
 3 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index b24f866fef81..8d9cb5df580e 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -215,19 +215,6 @@ _GLOBAL(low_choose_7447a_dfs)
 
 #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
 
-/*
- * complement mask on the msr then "or" some values on.
- * _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
-   mfmsr   r0  /* Get current msr */
-   andcr0,r0,r3/* And off the bits set in r3 (first parm) */
-   or  r0,r0,r4/* Or on the bits in r4 (second parm) */
-   SYNC/* Some chip revs have problems here... */
-   mtmsr   r0  /* Update machine state */
-   isync
-   blr /* Done */
-
 #ifdef CONFIG_40x
 
 /*
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c 
b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 15437abe1f6d..b95c3380d2b5 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -147,7 +147,8 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd)
local_irq_disable();
 
/* Set exception prefix high - to the firmware */
-   _nmask_and_or_msr(0, MSR_IP);
+   mtmsr(mfmsr() | MSR_IP);
+   isync();
 
for (;;) ;  /* Spin until reset happens */
 }
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c 
b/arch/powerpc/platforms/embedded6xx/storcenter.c
index ed1914dd34bb..e346ddcef45e 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -101,7 +101,8 @@ static void __noreturn storcenter_restart(char *cmd)
local_irq_disable();
 
/* Set exception prefix high - to the firmware */
-   _nmask_and_or_msr(0, MSR_IP);
+   mtmsr(mfmsr() | MSR_IP);
+   isync();
 
/* Wait for reset to happen */
for (;;) ;
-- 
2.25.0



Re: [PATCH 13/16] debug_vm_pgtable/pmd_clear: Don't use pmd/pud_clear on pte entries

2020-08-13 Thread Aneesh Kumar K.V

On 8/13/20 10:57 AM, Anshuman Khandual wrote:



On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:

pmd_clear() should not be used to clear pmd level pte entries.


Could you please elaborate on this. The proposed change set does
not match the description here.



pmd_clear is implemented such that we don't use that to clear a huge pte 
entry. We use pmdp_huge_get_and_clear() for that. Hence we have check in 
pmd_clear which add a WARN if we find a _PAGE_PTE set on the entry.


In the test we follow a hugepmd usage with a pmd_clear. We should 
instead at the end of the advanced pmd test use pmdp_huge_get_and_clear().






Signed-off-by: Aneesh Kumar K.V 
---
  mm/debug_vm_pgtable.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 061c19bba7f0..529892b9be2f 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -191,6 +191,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_young(pmd));
  
+	/*  Clear the pte entries  */

+   pmdp_huge_get_and_clear(mm, vaddr, pmdp);
pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
  }
  
@@ -313,6 +315,8 @@ static void __init pud_advanced_tests(struct mm_struct *mm,

pudp_test_and_clear_young(vma, vaddr, pudp);
pud = READ_ONCE(*pudp);
WARN_ON(pud_young(pud));
+
+   pudp_huge_get_and_clear(mm, vaddr, pudp);
  }
  
  static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)

@@ -431,8 +435,6 @@ static void __init pud_populate_tests(struct mm_struct *mm, 
pud_t *pudp,
 * This entry points to next level page table page.
 * Hence this must not qualify as pud_bad().
 */
-   pmd_clear(pmdp);
-   pud_clear(pudp);


Both entires are cleared before creating a fresh page table entry.
Why that is a problem.


pud_populate(mm, pudp, pmdp);
pud = READ_ONCE(*pudp);
WARN_ON(pud_bad(pud));
@@ -564,7 +566,6 @@ static void __init pmd_populate_tests(struct mm_struct *mm, 
pmd_t *pmdp,
 * This entry points to next level page table page.
 * Hence this must not qualify as pmd_bad().
 */
-   pmd_clear(pmdp);


Ditto.


pmd_populate(mm, pmdp, pgtable);
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_bad(pmd));





linux-next: runtime warning in Linus' tree

2020-08-13 Thread Stephen Rothwell
Hi all,

Testing Linus' tree today, my qemu runs (PowerPC
powerpc_pseries_le_defconfig) produce the following WARNING:

[0.021401][T0] Mount-cache hash table entries: 8192 (order: 0, 65536 
bytes, linear)
[0.021529][T0] Mountpoint-cache hash table entries: 8192 (order: 0, 
65536 bytes, linear)
[0.053969][T0] [ cut here ]
[0.055220][T0] WARNING: CPU: 0 PID: 0 at mm/memcontrol.c:5220 
mem_cgroup_css_alloc+0x350/0x904
[0.055355][T0] Modules linked in:
[0.055812][T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.8.0 #5
[0.055976][T0] NIP:  c0410010 LR: c040fd68 CTR: 

[0.056097][T0] REGS: c11e7ab0 TRAP: 0700   Not tainted  (5.8.0)
[0.056162][T0] MSR:  82029033   CR: 
24000888  XER: 
[0.056449][T0] CFAR: c040fd80 IRQMASK: 0 
[0.056449][T0] GPR00: c040fd68 c11e7d40 
c11e8300 0001 
[0.056449][T0] GPR04: 0228  
0001  
[0.056449][T0] GPR08: c0007d003208  
 c0007d002fe8 
[0.056449][T0] GPR12: 0001 c13d 
 011dd528 
[0.056449][T0] GPR16: 011dd840 011dd690 
0018 0003 
[0.056449][T0] GPR20: 0001 c10cbcf8 
0003 c10cd540 
[0.056449][T0] GPR24: c10e8778 c10e9080 
c10cbcd8  
[0.056449][T0] GPR28:  c0007e2a1000 
c10cbcc8 c118ea00 
[0.057109][T0] NIP [c0410010] mem_cgroup_css_alloc+0x350/0x904
[0.057177][T0] LR [c040fd68] mem_cgroup_css_alloc+0xa8/0x904
[0.057394][T0] Call Trace:
[0.057534][T0] [c11e7d40] [c040fd68] 
mem_cgroup_css_alloc+0xa8/0x904 (unreliable)
[0.057814][T0] [c11e7dc0] [c0f5b13c] 
cgroup_init_subsys+0xbc/0x210
[0.057903][T0] [c11e7e10] [c0f5b690] 
cgroup_init+0x220/0x598
[0.057973][T0] [c11e7ee0] [c0f34354] 
start_kernel+0x67c/0x6ec
[0.058047][T0] [c11e7f90] [c000cb88] 
start_here_common+0x1c/0x614
[0.058241][T0] Instruction dump:
[0.058420][T0] eac10030 eae10038 eb410050 eb610058 4b60 6000 
6000 6000 
[0.058550][T0] 3be00100 4bfffdfc 6000 6000 <0fe0> 4bfffd70 
6000 6000 
[0.059381][T0] ---[ end trace cb2d79b4994ef1fe ]---
[0.059810][T0] [ cut here ]
[0.059872][T0] WARNING: CPU: 0 PID: 0 at mm/memcontrol.c:5135 
mem_cgroup_css_alloc+0x750/0x904
[0.059930][T0] Modules linked in:
[0.060053][T0] CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW
 5.8.0 #5
[0.060113][T0] NIP:  c0410410 LR: c040ff2c CTR: 

[0.060171][T0] REGS: c11e7ab0 TRAP: 0700   Tainted: GW  
(5.8.0)
[0.060229][T0] MSR:  82029033   CR: 
24000880  XER: 
[0.060332][T0] CFAR: c040fe48 IRQMASK: 0 
[0.060332][T0] GPR00: c040ff2c c11e7d40 
c11e8300 c0007e234c00 
[0.060332][T0] GPR04:   
c0007e235000 0013 
[0.060332][T0] GPR08: 7ec0  
 0001 
[0.060332][T0] GPR12:  c13d 
 011dd528 
[0.060332][T0] GPR16: 011dd840 011dd690 
0018 0003 
[0.060332][T0] GPR20: c1223300 c0e95900 
c118ea00 c12232c0 
[0.060332][T0] GPR24: c10e8778 c10e9080 
00400cc0  
[0.060332][T0] GPR28:  c0007e2a1000 
c0007e234c00  
[0.060855][T0] NIP [c0410410] mem_cgroup_css_alloc+0x750/0x904
[0.060911][T0] LR [c040ff2c] mem_cgroup_css_alloc+0x26c/0x904
[0.060958][T0] Call Trace:
[0.061003][T0] [c11e7d40] [c040ff2c] 
mem_cgroup_css_alloc+0x26c/0x904 (unreliable)
[0.061081][T0] [c11e7dc0] [c0f5b13c] 
cgroup_init_subsys+0xbc/0x210
[0.061165][T0] [c11e7e10] [c0f5b690] 
cgroup_init+0x220/0x598
[0.061233][T0] [c11e7ee0] [c0f34354] 
start_kernel+0x67c/0x6ec
[0.061303][T0] [c11e7f90] [c000cb88] 
start_here_common+0x1c/0x614
[0.061364][T0] Instruction dump:
[0.061408][T0] ebe1fff8 7c0803a6 4e800020 6000 6000 3d220004 
e929d230 7c3c4800 
[0.061508][T0] 41820190 e93c03d2 4bfffc80 6000 <0fe0> 4bfffa38 
6000 6000 
[0.061630][T0] ---[ end trace 

Re: [PATCH 10/16] debug_vm_pgtable/thp: Use page table depost/withdraw with THP

2020-08-13 Thread Aneesh Kumar K.V

On 8/13/20 10:55 AM, Anshuman Khandual wrote:

On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:

Architectures like ppc64 use deposited page table while updating the huge pte
entries.

Signed-off-by: Aneesh Kumar K.V 
---
  mm/debug_vm_pgtable.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 644d28861ce9..48475d288df1 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -147,7 +147,7 @@ static void __init pmd_basic_tests(unsigned long pfn, 
pgprot_t prot)
  static void __init pmd_advanced_tests(struct mm_struct *mm,
  struct vm_area_struct *vma, pmd_t *pmdp,
  unsigned long pfn, unsigned long vaddr,
- pgprot_t prot)
+ pgprot_t prot, pgtable_t pgtable)
  {
pmd_t pmd;
  
@@ -158,6 +158,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,

/* Align the address wrt HPAGE_PMD_SIZE */
vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
  
+	pgtable_trans_huge_deposit(mm, pmdp, pgtable);

+
pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
set_pmd_at(mm, vaddr, pmdp, pmd);
pmdp_set_wrprotect(mm, vaddr, pmdp);
@@ -188,6 +190,8 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
pmdp_test_and_clear_young(vma, vaddr, pmdp);
pmd = READ_ONCE(*pmdp);
WARN_ON(pmd_young(pmd));
+
+   pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
  }
  
  static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)

@@ -1002,7 +1006,7 @@ static int __init debug_vm_pgtable(void)
pgd_clear_tests(mm, pgdp);
  
  	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);

-   pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
+   pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
  



Makes sense, if it is required for THP to work correctly but needs to be tested
across enabled platforms. Why should not the same apply for pud_advanced_tests()
on platforms that supports PUD based THP.




pud doesn't have page table deposit/withdraw semantics. We use that to 
support hugepage split. With pud mapping we don't split, we just drop 
the hugepage and expect it to be faulted back in as regular page.


-aneesh


Re: [PATCH 16/16] debug_vm_pgtable/ppc64: Add a variant of pfn_pte/pmd

2020-08-13 Thread Aneesh Kumar K.V

On 8/13/20 11:00 AM, Anshuman Khandual wrote:


On 08/12/2020 12:03 PM, Aneesh Kumar K.V wrote:

The tests do expect _PAGE_PTE bit set by different page table accessors.
This is not true for the kernel. Within the kernel, _PAGE_PTE bits are
usually set by set_pte_at(). To make the below tests work correctly add test
specific pfn_pte/pmd helpers that set _PAGE_PTE bit.

pte_t pte = pfn_pte(pfn, prot);
WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte;

Signed-off-by: Aneesh Kumar K.V 
---
  mm/debug_vm_pgtable.c | 65 +++
  1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index eea62d5e503b..153c925b5273 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -31,6 +31,23 @@
  #include 
  #include 
  
+#ifdef CONFIG_PPC_BOOK3S_64

+static inline pte_t debug_vm_pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+   pte_t pte = pfn_pte(pfn, pgprot);
+   return __pte(pte_val(pte) | _PAGE_PTE);
+
+}
+static inline pmd_t debug_vm_pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+   pmd_t pmd = pfn_pmd(pfn, pgprot);
+   return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+#else
+#define debug_vm_pfn_pte(pfn, pgprot) pfn_pte(pfn, pgprot)
+#define debug_vm_pfn_pmd(pfn, pgprot) pfn_pmd(pfn, pgprot)
+#endif


Again, no platform specific constructs please. This defeats the whole purpose of
this test. If __PAGE_PTE is required for the helpers, then pfn_pmd/pte() could
be modified to accommodate that. We dont see similar issues on other platforms,
hence could you please explain why ppc64 is different here.



It is not platform specific. set_pte_at is the one that set the 
_PAGE_PTE bit. We don't call that in the test.  The test seems to make 
the assumption that pfn_pte returns a proper pte which is not true.


-aneesh


Re: INFO: task hung in pipe_release (2)

2020-08-13 Thread syzbot
syzbot has bisected this issue to:

commit fddb5d430ad9fa91b49b1d34d0202ffe2fa0e179
Author: Aleksa Sarai 
Date:   Sat Jan 18 12:07:59 2020 +

open: introduce openat2(2) syscall

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=164e716a90
start commit:   6ba1b005 Merge tag 'asm-generic-fixes-5.8' of git://git.ke..
git tree:   upstream
final oops: https://syzkaller.appspot.com/x/report.txt?x=154e716a90
console output: https://syzkaller.appspot.com/x/log.txt?x=114e716a90
kernel config:  https://syzkaller.appspot.com/x/.config?x=84f076779e989e69
dashboard link: https://syzkaller.appspot.com/bug?extid=61acc40a49a3e46e25ea
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=142ae22490

Reported-by: syzbot+61acc40a49a3e46e2...@syzkaller.appspotmail.com
Fixes: fddb5d430ad9 ("open: introduce openat2(2) syscall")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection