[Bug 215217] Kernel fails to boot at an early stage when built with GCC_PLUGIN_LATENT_ENTROPY=y (PowerMac G4 3,6)

2021-12-17 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=215217

--- Comment #14 from Erhard F. (erhar...@mailbox.org) ---
(In reply to Christophe Leroy from comment #13)
> arch/powerpc/lib/feature-fixups.o also need DISABLE_LATENT_ENTROPY_PLUGIN,
> see extract from you vmlinux below
I can confirm this works, thanks!

I need

arch/powerpc/kernel/Makefile: 
CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
arch/powerpc/lib/Makefile:
CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)

to make it going on my G4 with GCC_PLUGIN_LATENT_ENTROPY=y. Modifying
setup_32.o is not needed.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

[PATCH] powerpc: use swap() to make code cleaner

2021-12-17 Thread davidcomponentone
From: Yang Guang 

Use the macro 'swap()' defined in 'include/linux/minmax.h' to avoid
opencoding it.

Reported-by: Zeal Robot 
Signed-off-by: David Yang 
Signed-off-by: Yang Guang 
---
 arch/powerpc/platforms/powermac/pic.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c 
b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..75d8d7ec53db 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -311,11 +311,8 @@ static void __init pmac_pic_probe_oldstyle(void)
 
/* Check ordering of master & slave */
if (of_device_is_compatible(master, "gatwick")) {
-   struct device_node *tmp;
BUG_ON(slave == NULL);
-   tmp = master;
-   master = slave;
-   slave = tmp;
+   swap(master, slave);
}
 
/* We found a slave */
-- 
2.30.2



Re: [PATCH/RFC] mm: add and use batched version of __tlb_remove_table()

2021-12-17 Thread Peter Zijlstra
On Fri, Dec 17, 2021 at 11:19:10AM +0300, Nikita Yushchenko wrote:
> When batched page table freeing via struct mmu_table_batch is used, the
> final freeing in __tlb_remove_table_free() executes a loop, calling
> arch hook __tlb_remove_table() to free each table individually.
> 
> Shift that loop down to archs. This allows archs to optimize it, by
> freeing multiple tables in a single release_pages() call. This is
> faster than individual put_page() calls, especially with memcg
> accounting enabled.
> 
> Signed-off-by: Andrey Ryabinin 
> Signed-off-by: Nikita Yushchenko 
> ---
>  arch/arm/include/asm/tlb.h   |  5 
>  arch/arm64/include/asm/tlb.h |  5 
>  arch/powerpc/include/asm/book3s/32/pgalloc.h |  8 +++
>  arch/powerpc/include/asm/book3s/64/pgalloc.h |  1 +
>  arch/powerpc/include/asm/nohash/pgalloc.h|  8 +++
>  arch/powerpc/mm/book3s64/pgtable.c   |  8 +++
>  arch/s390/include/asm/tlb.h  |  1 +
>  arch/s390/mm/pgalloc.c   |  8 +++
>  arch/sparc/include/asm/pgalloc_64.h  |  8 +++
>  arch/x86/include/asm/tlb.h   |  5 
>  include/asm-generic/tlb.h|  2 +-
>  include/linux/swap.h |  5 +++-
>  mm/mmu_gather.c  |  6 +
>  mm/swap_state.c  | 24 +++-
>  14 files changed, 81 insertions(+), 13 deletions(-)

Oh gawd, that's terrible. Never, ever duplicate code like that.

I'm thinking the below does the same? But yes, please do as Dave said,
give us actual numbers that show this is worth it.

---
 arch/Kconfig |  4 
 arch/arm/Kconfig |  1 +
 arch/arm/include/asm/tlb.h   |  5 -
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/tlb.h |  5 -
 arch/x86/Kconfig |  1 +
 arch/x86/include/asm/tlb.h   |  4 
 mm/mmu_gather.c  | 22 +++---
 8 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 26b8ed11639d..f2bd3f5af2b1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -415,6 +415,10 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config MMU_GATHER_TABLE_FREE
bool
 
+config MMU_GATHER_TABLE_PAGE
+   bool
+   depends on MMU_GATHER_TABLE_FREE
+
 config MMU_GATHER_RCU_TABLE_FREE
bool
select MMU_GATHER_TABLE_FREE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0f9e8bec83a..11baaa5719c2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -110,6 +110,7 @@ config ARM
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+   select MMU_GATHER_TABLE_PAGE if MMU
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b8cbe03ad260..9d9b21649ca0 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -29,11 +29,6 @@
 #include 
 #include 
 
-static inline void __tlb_remove_table(void *_table)
-{
-   free_page_and_swap_cache((struct page *)_table);
-}
-
 #include 
 
 static inline void
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4207cf9bb17..4aa28fb03f4f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -196,6 +196,7 @@ config ARM64
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUTEX_CMPXCHG if FUTEX
select MMU_GATHER_RCU_TABLE_FREE
+   select MMU_GATHER_TABLE_PAGE
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f4594f..401826260a5c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -11,11 +11,6 @@
 #include 
 #include 
 
-static inline void __tlb_remove_table(void *_table)
-{
-   free_page_and_swap_cache((struct page *)_table);
-}
-
 #define tlb_flush tlb_flush
 static void tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b9281fab4e3e..a22e653f4d0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -235,6 +235,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREEif PARAVIRT
+   select MMU_GATHER_TABLE_PAGE
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && 
(UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..dec5ffa3042a 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -32,9 +32,5 @@ static inline void tlb_flush(struct mmu_gather *tlb)
  * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in 

Re: [patch V3 28/35] PCI/MSI: Simplify pci_irq_get_affinity()

2021-12-17 Thread Nathan Chancellor
Hi Thomas,

On Fri, Dec 10, 2021 at 11:19:26PM +0100, Thomas Gleixner wrote:
> From: Thomas Gleixner 
> 
> Replace open coded MSI descriptor chasing and use the proper accessor
> functions instead.
> 
> Signed-off-by: Thomas Gleixner 
> Reviewed-by: Greg Kroah-Hartman 
> Reviewed-by: Jason Gunthorpe 

Apologies if this has already been reported somewhere else or already
fixed, I did a search of all of lore and did not see anything similar to
it and I did not see any new commits in -tip around this.

I just bisected a boot failure on my AMD test desktop to this patch as
commit f48235900182 ("PCI/MSI: Simplify pci_irq_get_affinity()") in
-next. It looks like there is a problem with the NVMe drive after this
change according to the logs. Given that the hard drive is not getting
mounted for journald to write logs to, I am not really sure how to get
them from the machine so I have at least taken a picture of what I see
on my screen; open to ideas on that front!

https://github.com/nathanchance/bug-files/blob/0d25d78b5bc1d5e9c15192b3bc80676364de8287/f48235900182/crash.jpg

Please let me know what information I can provide to make debugging this
easier and I am more than happy to apply and test patches as needed.

Cheers,
Nathan


[PATCH] powerpc: dts: Remove "spidev" nodes

2021-12-17 Thread Rob Herring
"spidev" is not a real device, but a Linux implementation detail. It has
never been documented either. The kernel has WARNed on the use of it for
over 6 years. Time to remove its usage from the tree.

Cc: Mark Brown 
Signed-off-by: Rob Herring 
---
 arch/powerpc/boot/dts/digsy_mtc.dts | 8 
 arch/powerpc/boot/dts/o2d.dtsi  | 6 --
 2 files changed, 14 deletions(-)

diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts 
b/arch/powerpc/boot/dts/digsy_mtc.dts
index 57024a4c1e7d..dfaf974c0ce6 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -25,14 +25,6 @@ rtc@800 {
status = "disabled";
};
 
-   spi@f00 {
-   msp430@0 {
-   compatible = "spidev";
-   spi-max-frequency = <32000>;
-   reg = <0>;
-   };
-   };
-
psc@2000 {  // PSC1
status = "disabled";
};
diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi
index b55a9e5bd828..7e52509fa506 100644
--- a/arch/powerpc/boot/dts/o2d.dtsi
+++ b/arch/powerpc/boot/dts/o2d.dtsi
@@ -34,12 +34,6 @@ psc@2000 {   // PSC1
#address-cells = <1>;
#size-cells = <0>;
cell-index = <0>;
-
-   spidev@0 {
-   compatible = "spidev";
-   spi-max-frequency = <25>;
-   reg = <0>;
-   };
};
 
psc@2200 {  // PSC2
-- 
2.32.0



[PATCH] powerpc/mpic: Use bitmap_zalloc() when applicable

2021-12-17 Thread Christophe JAILLET
'mpic->protected' is a bitmap. So use 'bitmap_zalloc()' to simplify
code and improve the semantic, instead of hand writing it.

Signed-off-by: Christophe JAILLET 
---
 arch/powerpc/sysdev/mpic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 995fb2ada507..626ba4a9f64f 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1323,8 +1323,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
psrc = of_get_property(mpic->node, "protected-sources", );
if (psrc) {
/* Allocate a bitmap with one bit per interrupt */
-   unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1);
-   mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL);
+   mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL);
BUG_ON(mpic->protected == NULL);
for (i = 0; i < psize/sizeof(u32); i++) {
if (psrc[i] > intvec_top)
-- 
2.30.2



Re: [PATCH/RFC] mm: add and use batched version of __tlb_remove_table()

2021-12-17 Thread Sam Ravnborg
Hi Nikita,

How about adding the following to tlb.h:

#ifndef __tlb_remove_tables
static void __tlb_remove_tables(...)
{

}
#endif


And then the few archs that want to override __tlb_remove_tables
needs to do a
#define __tlb_remove_tables __tlb_remove_tables
static void __tlb_remove_tables(...)
{
...
}

In this way the archs that uses the default implementation needs not do
anything.
A few functions already uses this pattern in tlb.h - see for example 
tlb_start_vma
io.h is another file where you can see the same pattern.

Sam


Re: [PATCH/RFC] mm: add and use batched version of __tlb_remove_table()

2021-12-17 Thread Dave Hansen
On 12/17/21 12:19 AM, Nikita Yushchenko wrote:
> When batched page table freeing via struct mmu_table_batch is used, the
> final freeing in __tlb_remove_table_free() executes a loop, calling
> arch hook __tlb_remove_table() to free each table individually.
> 
> Shift that loop down to archs. This allows archs to optimize it, by
> freeing multiple tables in a single release_pages() call. This is
> faster than individual put_page() calls, especially with memcg
> accounting enabled.

Could we quantify "faster"?  There's a non-trivial amount of code being
added here and it would be nice to back it up with some cold-hard numbers.

> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -95,11 +95,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct 
> page *page, int page_
>  
>  static void __tlb_remove_table_free(struct mmu_table_batch *batch)
>  {
> - int i;
> -
> - for (i = 0; i < batch->nr; i++)
> - __tlb_remove_table(batch->tables[i]);
> -
> + __tlb_remove_tables(batch->tables, batch->nr);
>   free_page((unsigned long)batch);
>  }

This leaves a single call-site for __tlb_remove_table():

> static void tlb_remove_table_one(void *table)
> {
> tlb_remove_table_sync_one();
> __tlb_remove_table(table);
> }

Is that worth it, or could it just be:

__tlb_remove_tables(, 1);

?

> -void free_pages_and_swap_cache(struct page **pages, int nr)
> +static void __free_pages_and_swap_cache(struct page **pages, int nr,
> + bool do_lru)
>  {
> - struct page **pagep = pages;
>   int i;
>  
> - lru_add_drain();
> + if (do_lru)
> + lru_add_drain();
>   for (i = 0; i < nr; i++)
> - free_swap_cache(pagep[i]);
> - release_pages(pagep, nr);
> + free_swap_cache(pages[i]);
> + release_pages(pages, nr);
> +}
> +
> +void free_pages_and_swap_cache(struct page **pages, int nr)
> +{
> + __free_pages_and_swap_cache(pages, nr, true);
> +}
> +
> +void free_pages_and_swap_cache_nolru(struct page **pages, int nr)
> +{
> + __free_pages_and_swap_cache(pages, nr, false);
>  }

This went unmentioned in the changelog.  But, it seems like there's a
specific optimization here.  In the exiting code,
free_pages_and_swap_cache() is wasteful if no page in pages[] is on the
LRU.  It doesn't need the lru_add_drain().

Any code that knows it is freeing all non-LRU pages can call
free_pages_and_swap_cache_nolru() which should perform better than
free_pages_and_swap_cache().

Should we add this to the for loop in __free_pages_and_swap_cache()?

for (i = 0; i < nr; i++) {
if (!do_lru)
VM_WARN_ON_ONCE_PAGE(PageLRU(pagep[i]),
 pagep[i]);
free_swap_cache(...);
}

But, even more than that, do all the architectures even need the
free_swap_cache()?  PageSwapCache() will always be false on x86, which
makes the loop kinda silly.  x86 could, for instance, just do:

static inline void __tlb_remove_tables(void **tables, int nr)
{
release_pages((struct page **)tables, nr);
}

I _think_ this will work everywhere that has whole pages as page tables.
 Taking that one step further, what if we only had one generic:

static inline void tlb_remove_tables(void **tables, int nr)
{
int i;

#ifdef ARCH_PAGE_TABLES_ARE_FULL_PAGE
release_pages((struct page **)tables, nr);
#else
arch_tlb_remove_tables(tables, i);
#endif
}

Architectures that set ARCH_PAGE_TABLES_ARE_FULL_PAGE (or whatever)
don't need to implement __tlb_remove_table() at all *and* can do
release_pages() directly.

This avoids all the  confusion with the swap cache and LRU naming.


Re: [PATCH v3 11/12] lkdtm: Fix execute_[user]_location()

2021-12-17 Thread Helge Deller
On 12/17/21 12:49, Christophe Leroy wrote:
> Hi Kees,
>
> Le 17/10/2021 à 14:38, Christophe Leroy a écrit :
>> execute_location() and execute_user_location() intent
>> to copy do_nothing() text and execute it at a new location.
>> However, at the time being it doesn't copy do_nothing() function
>> but do_nothing() function descriptor which still points to the
>> original text. So at the end it still executes do_nothing() at
>> its original location allthough using a copied function descriptor.
>>
>> So, fix that by really copying do_nothing() text and build a new
>> function descriptor by copying do_nothing() function descriptor and
>> updating the target address with the new location.
>>
>> Also fix the displayed addresses by dereferencing do_nothing()
>> function descriptor.
>>
>> Signed-off-by: Christophe Leroy 
>
> Do you have any comment to this patch and to patch 12 ?
>
> If not, is it ok to get your acked-by ?


Hi Christophe,

I think this whole series is a nice cleanup and harmonization
of how function descriptors are used.

At least for the PA-RISC parts you may add:
Acked-by: Helge Deller 

Thanks!
Helge

>
>> ---
>>   drivers/misc/lkdtm/perms.c | 37 -
>>   1 file changed, 28 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
>> index 035fcca441f0..1cf24c4a79e9 100644
>> --- a/drivers/misc/lkdtm/perms.c
>> +++ b/drivers/misc/lkdtm/perms.c
>> @@ -44,19 +44,34 @@ static noinline void do_overwritten(void)
>>  return;
>>   }
>>
>> +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
>> +{
>> +if (!have_function_descriptors())
>> +return dst;
>> +
>> +memcpy(fdesc, do_nothing, sizeof(*fdesc));
>> +fdesc->addr = (unsigned long)dst;
>> +barrier();
>> +
>> +return fdesc;
>> +}
>> +
>>   static noinline void execute_location(void *dst, bool write)
>>   {
>> -void (*func)(void) = dst;
>> +void (*func)(void);
>> +func_desc_t fdesc;
>> +void *do_nothing_text = dereference_function_descriptor(do_nothing);
>>
>> -pr_info("attempting ok execution at %px\n", do_nothing);
>> +pr_info("attempting ok execution at %px\n", do_nothing_text);
>>  do_nothing();
>>
>>  if (write == CODE_WRITE) {
>> -memcpy(dst, do_nothing, EXEC_SIZE);
>> +memcpy(dst, do_nothing_text, EXEC_SIZE);
>>  flush_icache_range((unsigned long)dst,
>> (unsigned long)dst + EXEC_SIZE);
>>  }
>> -pr_info("attempting bad execution at %px\n", func);
>> +pr_info("attempting bad execution at %px\n", dst);
>> +func = setup_function_descriptor(, dst);
>>  func();
>>  pr_err("FAIL: func returned\n");
>>   }
>> @@ -66,16 +81,19 @@ static void execute_user_location(void *dst)
>>  int copied;
>>
>>  /* Intentionally crossing kernel/user memory boundary. */
>> -void (*func)(void) = dst;
>> +void (*func)(void);
>> +func_desc_t fdesc;
>> +void *do_nothing_text = dereference_function_descriptor(do_nothing);
>>
>> -pr_info("attempting ok execution at %px\n", do_nothing);
>> +pr_info("attempting ok execution at %px\n", do_nothing_text);
>>  do_nothing();
>>
>> -copied = access_process_vm(current, (unsigned long)dst, do_nothing,
>> +copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
>> EXEC_SIZE, FOLL_WRITE);
>>  if (copied < EXEC_SIZE)
>>  return;
>> -pr_info("attempting bad execution at %px\n", func);
>> +pr_info("attempting bad execution at %px\n", dst);
>> +func = setup_function_descriptor(, dst);
>>  func();
>>  pr_err("FAIL: func returned\n");
>>   }
>> @@ -153,7 +171,8 @@ void lkdtm_EXEC_VMALLOC(void)
>>
>>   void lkdtm_EXEC_RODATA(void)
>>   {
>> -execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
>> +
>> execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing),
>> + CODE_AS_IS);
>>   }
>>
>>   void lkdtm_EXEC_USERSPACE(void)



Re: linux-next: manual merge of the audit tree with the powerpc tree

2021-12-17 Thread Christophe Leroy


Le 17/12/2021 à 00:04, Paul Moore a écrit :
> On Thu, Dec 16, 2021 at 4:08 AM Christophe Leroy
>  wrote:
>> Thanks Cédric, I've now been able to install debian PPC32 port of DEBIAN
>> 11 on QEMU and run the tests.
>>
>> I followed instructions in file README.md provided in the test suite.
>> I also modified tests/Makefile to force MODE := 32
>>
>> I've got a lot of failures, am I missing some options in the kernel or
>> something ?
>>
>> Running as   userroot
>>   with context root:::
>>   on   system
> 
> While SELinux is not required for audit, I don't think I've ever run
> it on system without SELinux.  In theory the audit-testsuite shouldn't
> rely on SELinux being present (other than the SELinux specific tests
> of course), but I'm not confident enough to say that the test suite
> will run without problem without SELinux.
> 
> If it isn't too difficult, I would suggest enabling SELinux in your
> kernel build and ensuring the necessary userspace, policy, etc. is
> installed.  You don't need to worry about getting it all running
> correctly; the audit-testsuite should pass with SELinux in permissive
> mode.
> 
> If you're still seeing all these failures after trying that let us know.
> 

Still the same it seems:

Running as   userroot
 with context unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
 on   system

# Test 3 got: "256" (backlog_wait_time_actual_reset/test at line 151)
#   Expected: "0"
#  backlog_wait_time_actual_reset/test line 151 is: ok( $result, 0 ); 
  # Was an event found?
# Test 4 got: "0" (backlog_wait_time_actual_reset/test at line 168)
#   Expected: "1"
#  backlog_wait_time_actual_reset/test line 168 is: ok( $found_msg, 1 ); 
# Was the message well-formed?
# Failed test 5 in backlog_wait_time_actual_reset/test at line 169
#  backlog_wait_time_actual_reset/test line 169 is: ok( $reset_rc == 
$reset_msg )
backlog_wait_time_actual_reset/test ..
Failed 3/5 subtests
sh: 1: Syntax error: Bad fd number
sh: 1: Syntax error: Bad fd number
exec_execve/test . ok
sh: 1: Syntax error: Bad fd number
sh: 1: Syntax error: Bad fd number
# Failed test 7 in exec_name/test at line 145 fail #4
#  exec_name/test line 145 is: ok( $found[$_] == $expected[$_] );
sh: 1: Syntax error: Bad fd number
# Failed test 11 in exec_name/test at line 145 fail #7
sh: 1: Syntax error: Bad fd number
# Failed test 15 in exec_name/test at line 145 fail #10
# Failed test 17 in exec_name/test at line 145 fail #12
sh: 1: Syntax error: Bad fd number
# Failed test 19 in exec_name/test at line 145 fail #13
sh: 1: Syntax error: Bad fd number
# Failed test 23 in exec_name/test at line 145 fail #16
# Failed test 24 in exec_name/test at line 145 fail #17
sh: 1: Syntax error: Bad fd number
Error sending add rule data request (Rule exists)
# Failed test 29 in exec_name/test at line 145 fail #21
sh: 1: Syntax error: Bad fd number
exec_name/test ...
Failed 8/29 subtests
sh: 1: Syntax error: Bad fd number
# Failed test 2 in file_create/test at line 121
#  file_create/test line 121 is: ok($found_syscall);
# Failed test 3 in file_create/test at line 122
#  file_create/test line 122 is: ok($found_parent);
# Failed test 4 in file_create/test at line 123
#  file_create/test line 123 is: ok($found_create);
sh: 1: Syntax error: Bad fd number
file_create/test .
Failed 3/4 subtests
sh: 1: Syntax error: Bad fd number
# Failed test 2 in file_delete/test at line 122
#  file_delete/test line 122 is: ok($found_syscall);
# Failed test 3 in file_delete/test at line 123
#  file_delete/test line 123 is: ok($found_parent);
# Failed test 4 in file_delete/test at line 124
#  file_delete/test line 124 is: ok($found_delete);
sh: 1: Syntax error: Bad fd number
file_delete/test .
Failed 3/4 subtests
sh: 1: Syntax error: Bad fd number
# Failed test 2 in file_rename/test at line 138
#  file_rename/test line 138 is: ok($found_syscall);
# Test 3 got: "0" (file_rename/test at line 139)
#   Expected: "2"
#  file_rename/test line 139 is: ok( $found_parent, 2 );
# Failed test 4 in file_rename/test at line 140
#  file_rename/test line 140 is: ok($found_create);
# Failed test 5 in file_rename/test at line 141
#  file_rename/test line 141 is: ok($found_delete);
sh: 1: Syntax error: Bad fd number
file_rename/test .
Failed 4/5 subtests
sh: 1: Syntax error: Bad fd number
# Test 20 got: "256" (filter_exclude/test at line 167)
#Expected: "0"
#  filter_exclude/test line 167 is: ok( $result, 0 );
# Test 21 got: "0" (filter_exclude/test at line 179)
#Expected: "1"
#  filter_exclude/test line 179 is: ok( $found_msg, 1 );
sh: 1: Syntax error: Bad fd number
filter_exclude/test ..
Failed 2/21 subtests
sh: 1: cannot create /dev/udp/127.0.0.1/24242: Directory nonexistent
# Test 3 got: "256" (filter_saddr_fam/test at line 88)
#   Expected: "0"
#  filter_saddr_fam/test line 88 is: ok( $result, 0 );# Was 

Re: [PATCH v2 00/13] Unify asm/unaligned.h around struct helper

2021-12-17 Thread Segher Boessenkool
On Fri, Dec 17, 2021 at 12:34:53PM +, David Laight wrote:
> From: Segher Boessenkool
> > Sent: 16 December 2021 18:56
> ...
> > > The only remaining problem here is reinterpreting a char* pointer to a
> > > u32*, e.g., for accessing the IP address in an Ethernet frame when
> > > NET_IP_ALIGN == 2, which could suffer from the same UB problem again,
> > > as I understand it.
> > 
> > The problem is never casting a pointer to pointer to character type, and
> > then later back to an appriopriate pointer type.
> > These things are both required to work.
> 
> I think that is true of 'void *', not 'char *'.

No, see 6.3.2.3/7.  Both are allowed (and behave the same in fact).

> 'char' is special in that 'strict aliasing' doesn't apply to it.
> (Which is actually a pain sometimes.)

That has nothing to do with it.  Yes, you can validly access any memory
as a character type, but that has nothing to do with what pointer casts
are allowed and which are not.

> > The problem always is accessing something as if it
> > was something of another type, which is not valid C.  This however is
> > exactly what -fno-strict-aliasing allows, so that works as well.
> 
> IIRC the C language only allows you to have pointers to valid data items.
> (Since they can only be generated by the & operator on a valid item.)

Not so.  For example you are explicitly allowed to have pointers one
past the last element of an array (and do arithmetic on that!), and of
course null pointers are a thing.

C allows you to make up pointers from integers as well.  This is
perfectly fine to do.  Accessing anything via such pointers might well
be not standard C, of course.

> Indirecting any other pointer is probably UB!

If a pointer points to an object, indirecting it gives an lvalue of that
object.  It does not matter how you got that pointer, all that matters
is that it points at a valid object.

> This (sort of) allows the compiler to 'look through' casts to find
> what the actual type is (or might be).
> It can then use that information to make optimisation choices.
> This has caused grief with memcpy() calls that are trying to copy
> a structure that the coder knows is misaligned to an aligned buffer.

This is 6.5/7.

Alignment is 6.2.8 but it doesn't actually come into play at all here.

> So while *(unaligned_ptr *)char_ptr probably has to work.

Only if the original pointer points to an object that is correct
(including correctly aligned) for such an lvalue.

> If the compiler can see *(unaligned_ptr *)(char *)int_ptr it can
> assume the alignment of the 'int_ptr' and do a single aligned access.

It is undefined behaviour to have an address in int_ptr that is not
correctly aligned for whatever type it points to.


Segher


RE: [PATCH v2 00/13] Unify asm/unaligned.h around struct helper

2021-12-17 Thread David Laight
From: Segher Boessenkool
> Sent: 16 December 2021 18:56
...
> > The only remaining problem here is reinterpreting a char* pointer to a
> > u32*, e.g., for accessing the IP address in an Ethernet frame when
> > NET_IP_ALIGN == 2, which could suffer from the same UB problem again,
> > as I understand it.
> 
> The problem is never casting a pointer to pointer to character type, and
> then later back to an appriopriate pointer type.
> These things are both required to work.

I think that is true of 'void *', not 'char *'.
'char' is special in that 'strict aliasing' doesn't apply to it.
(Which is actually a pain sometimes.)

> The problem always is accessing something as if it
> was something of another type, which is not valid C.  This however is
> exactly what -fno-strict-aliasing allows, so that works as well.

IIRC the C language only allows you to have pointers to valid data items.
(Since they can only be generated by the & operator on a valid item.)
Indirecting any other pointer is probably UB!

This (sort of) allows the compiler to 'look through' casts to find
what the actual type is (or might be).
It can then use that information to make optimisation choices.
This has caused grief with memcpy() calls that are trying to copy
a structure that the coder knows is misaligned to an aligned buffer.

So while *(unaligned_ptr *)char_ptr probably has to work.
If the compiler can see *(unaligned_ptr *)(char *)int_ptr it can
assume the alignment of the 'int_ptr' and do a single aligned access.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH v3 11/12] lkdtm: Fix execute_[user]_location()

2021-12-17 Thread Christophe Leroy
Hi Kees,

Le 17/10/2021 à 14:38, Christophe Leroy a écrit :
> execute_location() and execute_user_location() intent
> to copy do_nothing() text and execute it at a new location.
> However, at the time being it doesn't copy do_nothing() function
> but do_nothing() function descriptor which still points to the
> original text. So at the end it still executes do_nothing() at
> its original location allthough using a copied function descriptor.
> 
> So, fix that by really copying do_nothing() text and build a new
> function descriptor by copying do_nothing() function descriptor and
> updating the target address with the new location.
> 
> Also fix the displayed addresses by dereferencing do_nothing()
> function descriptor.
> 
> Signed-off-by: Christophe Leroy 

Do you have any comment to this patch and to patch 12 ?

If not, is it ok to get your acked-by ?

Thanks
Christophe

> ---
>   drivers/misc/lkdtm/perms.c | 37 -
>   1 file changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
> index 035fcca441f0..1cf24c4a79e9 100644
> --- a/drivers/misc/lkdtm/perms.c
> +++ b/drivers/misc/lkdtm/perms.c
> @@ -44,19 +44,34 @@ static noinline void do_overwritten(void)
>   return;
>   }
>   
> +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
> +{
> + if (!have_function_descriptors())
> + return dst;
> +
> + memcpy(fdesc, do_nothing, sizeof(*fdesc));
> + fdesc->addr = (unsigned long)dst;
> + barrier();
> +
> + return fdesc;
> +}
> +
>   static noinline void execute_location(void *dst, bool write)
>   {
> - void (*func)(void) = dst;
> + void (*func)(void);
> + func_desc_t fdesc;
> + void *do_nothing_text = dereference_function_descriptor(do_nothing);
>   
> - pr_info("attempting ok execution at %px\n", do_nothing);
> + pr_info("attempting ok execution at %px\n", do_nothing_text);
>   do_nothing();
>   
>   if (write == CODE_WRITE) {
> - memcpy(dst, do_nothing, EXEC_SIZE);
> + memcpy(dst, do_nothing_text, EXEC_SIZE);
>   flush_icache_range((unsigned long)dst,
>  (unsigned long)dst + EXEC_SIZE);
>   }
> - pr_info("attempting bad execution at %px\n", func);
> + pr_info("attempting bad execution at %px\n", dst);
> + func = setup_function_descriptor(, dst);
>   func();
>   pr_err("FAIL: func returned\n");
>   }
> @@ -66,16 +81,19 @@ static void execute_user_location(void *dst)
>   int copied;
>   
>   /* Intentionally crossing kernel/user memory boundary. */
> - void (*func)(void) = dst;
> + void (*func)(void);
> + func_desc_t fdesc;
> + void *do_nothing_text = dereference_function_descriptor(do_nothing);
>   
> - pr_info("attempting ok execution at %px\n", do_nothing);
> + pr_info("attempting ok execution at %px\n", do_nothing_text);
>   do_nothing();
>   
> - copied = access_process_vm(current, (unsigned long)dst, do_nothing,
> + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
>  EXEC_SIZE, FOLL_WRITE);
>   if (copied < EXEC_SIZE)
>   return;
> - pr_info("attempting bad execution at %px\n", func);
> + pr_info("attempting bad execution at %px\n", dst);
> + func = setup_function_descriptor(, dst);
>   func();
>   pr_err("FAIL: func returned\n");
>   }
> @@ -153,7 +171,8 @@ void lkdtm_EXEC_VMALLOC(void)
>   
>   void lkdtm_EXEC_RODATA(void)
>   {
> - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
> + 
> execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing),
> +  CODE_AS_IS);
>   }
>   
>   void lkdtm_EXEC_USERSPACE(void)
> 

[PATCH/RFC] mm: add and use batched version of __tlb_remove_table()

2021-12-17 Thread Nikita Yushchenko
When batched page table freeing via struct mmu_table_batch is used, the
final freeing in __tlb_remove_table_free() executes a loop, calling
arch hook __tlb_remove_table() to free each table individually.

Shift that loop down to archs. This allows archs to optimize it, by
freeing multiple tables in a single release_pages() call. This is
faster than individual put_page() calls, especially with memcg
accounting enabled.

Signed-off-by: Andrey Ryabinin 
Signed-off-by: Nikita Yushchenko 
---
 arch/arm/include/asm/tlb.h   |  5 
 arch/arm64/include/asm/tlb.h |  5 
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  8 +++
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  1 +
 arch/powerpc/include/asm/nohash/pgalloc.h|  8 +++
 arch/powerpc/mm/book3s64/pgtable.c   |  8 +++
 arch/s390/include/asm/tlb.h  |  1 +
 arch/s390/mm/pgalloc.c   |  8 +++
 arch/sparc/include/asm/pgalloc_64.h  |  8 +++
 arch/x86/include/asm/tlb.h   |  5 
 include/asm-generic/tlb.h|  2 +-
 include/linux/swap.h |  5 +++-
 mm/mmu_gather.c  |  6 +
 mm/swap_state.c  | 24 +++-
 14 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b8cbe03ad260..37f8a5193581 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -34,6 +34,11 @@ static inline void __tlb_remove_table(void *_table)
free_page_and_swap_cache((struct page *)_table);
 }
 
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+   free_pages_and_swap_cache_nolru((struct page **)tables, nr);
+}
+
 #include 
 
 static inline void
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f4594f..c70dd428e1f6 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -16,6 +16,11 @@ static inline void __tlb_remove_table(void *_table)
free_page_and_swap_cache((struct page *)_table);
 }
 
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+   free_pages_and_swap_cache_nolru((struct page **)tables, nr);
+}
+
 #define tlb_flush tlb_flush
 static void tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index dc5c039eb28e..880369de688a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -66,6 +66,14 @@ static inline void __tlb_remove_table(void *_table)
pgtable_free(table, shift);
 }
 
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+   int i;
+
+   for (i = 0; i < nr; i++)
+   __tlb_remove_table(tables[i]);
+}
+
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
  unsigned long address)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index e1af0b394ceb..f3dcd735e4ce 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -20,6 +20,7 @@ extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned 
long);
 extern void pmd_fragment_free(unsigned long *);
 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
 extern void __tlb_remove_table(void *_table);
+extern void __tlb_remove_tables(void **tables, int nr);
 void pte_frag_destroy(void *pte_frag);
 
 static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h 
b/arch/powerpc/include/asm/nohash/pgalloc.h
index 29c43665a753..170f5fda3dc1 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -63,6 +63,14 @@ static inline void __tlb_remove_table(void *_table)
pgtable_free(table, shift);
 }
 
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+   int i;
+
+   for (i = 0; i < nr; i++)
+   __tlb_remove_table(tables[i]);
+}
+
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
  unsigned long address)
 {
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..f95fb42fadfa 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -412,6 +412,14 @@ void __tlb_remove_table(void *_table)
return pgtable_free(table, index);
 }
 
+void __tlb_remove_tables(void **tables, int nr)
+{
+   int i;
+
+   for (i = 0; i < nr; i++)
+   __tlb_remove_table(tables[i]);
+}
+
 #ifdef CONFIG_PROC_FS
 atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 

[PATCH v6 14/14] powerpc: Simplify and move arch_randomize_brk()

2021-12-17 Thread Christophe Leroy
arch_randomize_brk() is only needed for hash on book3s/64, for other
platforms the one provided by the default mmap layout is good enough.

Move it to hash_utils.c and use randomize_page() like the generic one.

And properly opt out the radix case instead of making an assumption
on mmu_highuser_ssize.

Also change to a 32M range like most other architectures instead of 8M.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/process.c | 41 ---
 arch/powerpc/mm/book3s64/hash_utils.c | 19 +
 2 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 984813a4d5dc..e7f809bdd433 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -34,10 +34,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -2313,42 +2311,3 @@ unsigned long arch_align_stack(unsigned long sp)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ~0xf;
 }
-
-static inline unsigned long brk_rnd(void)
-{
-unsigned long rnd = 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
-   else
-   rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
-
-   return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-   unsigned long base = mm->brk;
-   unsigned long ret;
-
-#ifdef CONFIG_PPC_BOOK3S_64
-   /*
-* If we are using 1TB segments and we are allowed to randomise
-* the heap, we can put it above 1TB so it is backed by a 1TB
-* segment. Otherwise the heap will be in the bottom 1TB
-* which always uses 256MB segments and this may result in a
-* performance penalty.
-*/
-   if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == 
MMU_SEGSIZE_1T))
-   base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
-   ret = PAGE_ALIGN(base + brk_rnd());
-
-   if (ret < mm->brk)
-   return mm->brk;
-
-   return ret;
-}
-
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 7ecadf5e6bf9..68a5468b0f19 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -2171,3 +2173,20 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask);
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   /*
+* If we are using 1TB segments and we are allowed to randomise
+* the heap, we can put it above 1TB so it is backed by a 1TB
+* segment. Otherwise the heap will be in the bottom 1TB
+* which always uses 256MB segments and this may result in a
+* performance penalty.
+*/
+   if (is_32bit_task())
+   return randomize_page(mm->brk, SZ_32M);
+   else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+   return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), 
SZ_1G);
+   else
+   return randomize_page(mm->brk, SZ_1G);
+}
-- 
2.33.1


[PATCH v6 13/14] powerpc/mm: Convert to default topdown mmap layout

2021-12-17 Thread Christophe Leroy
Select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT and
remove arch/powerpc/mm/mmap.c

This change reuses the generic framework added by
commit 67f3977f805b ("arm64, mm: move generic mmap layout
functions to mm") without any functional change.

Comparison between powerpc implementation and the generic one:
- mmap_is_legacy() is identical.
- arch_mmap_rnd() does exactly the same allthough it's written
slightly differently.
- MIN_GAP and MAX_GAP are identical.
- mmap_base() does the same but uses STACK_RND_MASK which provides
the same values as stack_maxrandom_size().
- arch_pick_mmap_layout() is identical.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig |   2 +-
 arch/powerpc/include/asm/processor.h |   2 -
 arch/powerpc/mm/Makefile |   2 +-
 arch/powerpc/mm/mmap.c   | 105 ---
 4 files changed, 2 insertions(+), 109 deletions(-)
 delete mode 100644 arch/powerpc/mm/mmap.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0631c9241af3..b4ae3d8bde46 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -122,7 +122,6 @@ config PPC
select ARCH_HAS_DEBUG_WXif STRICT_KERNEL_RWX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_MAP_DIRECT  if PPC_PSERIES
-   select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD  if HUGETLB_PAGE
@@ -158,6 +157,7 @@ config PPC
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS  if PPC_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS
+   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 2c8686d9e964..873adaab20c8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -392,8 +392,6 @@ static inline void prefetchw(const void *x)
 
 #define spin_lock_prefetch(x)  prefetchw(x)
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d4c20484dad9..503a6e249940 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC)
 
-obj-y  := fault.o mem.o pgtable.o mmap.o maccess.o 
pageattr.o \
+obj-y  := fault.o mem.o pgtable.o maccess.o pageattr.o 
\
   init_$(BITS).o pgtable_$(BITS).o \
   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
   init-common.o mmu_context.o drmem.o \
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
deleted file mode 100644
index d9eae456558a..
--- a/arch/powerpc/mm/mmap.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * Started by Ingo Molnar 
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave at least a ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-   if (current->personality & ADDR_COMPAT_LAYOUT)
-   return 1;
-
-   if (rlim_stack->rlim_cur == RLIM_INFINITY)
-   return 1;
-
-   return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-   unsigned long shift, rnd;
-
-   shift = mmap_rnd_bits;
-#ifdef CONFIG_COMPAT
-   if (is_32bit_task())
-   shift = mmap_rnd_compat_bits;
-#endif
-   rnd = get_random_long() % (1ul << shift);
-
-   return rnd << PAGE_SHIFT;
-}
-
-static inline unsigned long stack_maxrandom_size(void)
-{
-   if (!(current->flags & PF_RANDOMIZE))
-   return 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   return (1<<23);
-   else
-   return (1<<30);
-}
-
-static inline unsigned long mmap_base(unsigned long rnd,
- struct rlimit *rlim_stack)
-{
-   unsigned long gap = rlim_stack->rlim_cur;
-   unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-
-   /* Values close to RLIM_INFINITY can overflow. */
-   if (gap + pad > gap)
-   gap += pad;
-
-   if (gap < MIN_GAP)
-   gap = MIN_GAP;
-   else 

[PATCH v6 12/14] powerpc/mm: Enable full randomisation of memory mappings

2021-12-17 Thread Christophe Leroy
Do like most other architectures and provide randomisation also to
"legacy" memory mappings, by adding the random factor to
mm->mmap_base in arch_pick_mmap_layout().

See commit 8b8addf891de ("x86/mm/32: Enable full randomization on
i386 and X86_32") for all explanations and benefits of that mmap
randomisation.

At the moment, slice_find_area_bottomup() doesn't use mm->mmap_base
but uses the fixed TASK_UNMAPPED_BASE instead.
slice_find_area_bottomup() being used as a fallback to
slice_find_area_topdown(), it can't use mm->mmap_base
directly.

Instead of always using TASK_UNMAPPED_BASE as base address, leave
it to the caller. When called from slice_find_area_topdown()
TASK_UNMAPPED_BASE is used. Otherwise mm->mmap_base is used.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s64/slice.c | 18 +++---
 arch/powerpc/mm/mmap.c   |  2 +-
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 03681042b807..c0b58afb9a47 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -276,20 +276,18 @@ static bool slice_scan_available(unsigned long addr,
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
- unsigned long len,
+ unsigned long addr, unsigned long 
len,
  const struct slice_mask 
*available,
  int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, next_end;
+   unsigned long found, next_end;
struct vm_unmapped_area_info info;
 
info.flags = 0;
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = TASK_UNMAPPED_BASE;
/*
 * Check till the allow max value for this mmap request
 */
@@ -322,12 +320,12 @@ static unsigned long slice_find_area_bottomup(struct 
mm_struct *mm,
 }
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
-unsigned long len,
+unsigned long addr, unsigned long 
len,
 const struct slice_mask *available,
 int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, prev;
+   unsigned long found, prev;
struct vm_unmapped_area_info info;
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
 
@@ -335,8 +333,6 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = mm->mmap_base;
/*
 * If we are trying to allocate above DEFAULT_MAP_WINDOW
 * Add the different to the mmap_base.
@@ -377,7 +373,7 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
 * can happen with large stack limits and large mmap()
 * allocations.
 */
-   return slice_find_area_bottomup(mm, len, available, psize, high_limit);
+   return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, 
psize, high_limit);
 }
 
 
@@ -386,9 +382,9 @@ static unsigned long slice_find_area(struct mm_struct *mm, 
unsigned long len,
 int topdown, unsigned long high_limit)
 {
if (topdown)
-   return slice_find_area_topdown(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_topdown(mm, mm->mmap_base, len, mask, 
psize, high_limit);
else
-   return slice_find_area_bottomup(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, 
psize, high_limit);
 }
 
 static inline void slice_copy_mask(struct slice_mask *dst,
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 5972d619d274..d9eae456558a 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -96,7 +96,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct 
rlimit *rlim_stack)
 * bit is set, or if the expected stack growth is unlimited:
 */
if (mmap_is_legacy(rlim_stack)) {
-   mm->mmap_base = TASK_UNMAPPED_BASE;
+   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
-- 
2.33.1


[PATCH v6 10/14] powerpc/mm: Use generic_hugetlb_get_unmapped_area()

2021-12-17 Thread Christophe Leroy
Use the generic version of arch_hugetlb_get_unmapped_area()
which is now available at all time.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/hugetlb.h |  4 --
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 55 
 arch/powerpc/mm/hugetlbpage.c|  4 +-
 3 files changed, 1 insertion(+), 62 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h 
b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 12e150e615b7..b37a28f62cf6 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -8,10 +8,6 @@
  */
 void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
 void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
-extern unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags);
 
 extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c 
b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 23d3e08911d3..d2fb776febb4 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -41,61 +41,6 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct 
*vma, unsigned long st
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
 }
 
-/*
- * A vairant of hugetlb_get_unmapped_area doing topdown search
- * FIXME!! should we do as x86 does or non hugetlb area does ?
- * ie, use topdown or not based on mmap_is_legacy check ?
- */
-unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags)
-{
-   struct mm_struct *mm = current->mm;
-   struct vm_area_struct *vma;
-   struct hstate *h = hstate_file(file);
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len & ~huge_page_mask(h))
-   return -EINVAL;
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   if (prepare_hugepage_range(file, addr, len))
-   return -EINVAL;
-   return addr;
-   }
-
-   if (addr) {
-   addr = ALIGN(addr, huge_page_size(h));
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-   /*
-* We are always doing an topdown search here. Slice code
-* does that too.
-*/
-   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-   info.length = len;
-   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
-   info.align_mask = PAGE_MASK & ~huge_page_mask(h);
-   info.align_offset = 0;
-
-   return vm_unmapped_area();
-}
-
 void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep,
 pte_t old_pte, pte_t pte)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f18b3a1d18f0..bfd7f4af1e58 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -553,11 +553,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
 {
-#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
-   return radix__hugetlb_get_unmapped_area(file, addr, len,
+   return generic_hugetlb_get_unmapped_area(file, addr, len,
   pgoff, flags);
-#endif
 #ifdef CONFIG_PPC_64S_HASH_MMU
return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 
1);
 #endif
-- 
2.33.1


[PATCH v6 09/14] powerpc/mm: Use generic_get_unmapped_area() and call it from arch_get_unmapped_area()

2021-12-17 Thread Christophe Leroy
Use the generic version of arch_get_unmapped_area() which
is now available at all time instead of its copy
radix__arch_get_unmapped_area()

To allow that for PPC64, add arch_get_mmap_base() and
arch_get_mmap_end() macros.

Instead of setting mm->get_unmapped_area() to either
arch_get_unmapped_area() or generic_get_unmapped_area(),
always set it to arch_get_unmapped_area() and call
generic_get_unmapped_area() from there when radix is enabled.

Do the same with radix__arch_get_unmapped_area_topdown()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/task_size_64.h |   8 ++
 arch/powerpc/mm/mmap.c  | 127 ++--
 2 files changed, 14 insertions(+), 121 deletions(-)

diff --git a/arch/powerpc/include/asm/task_size_64.h 
b/arch/powerpc/include/asm/task_size_64.h
index c993482237ed..0544764b32c3 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -76,4 +76,12 @@
 #define STACK_TOP_MAX TASK_SIZE_USER64
 #define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64)
 
+#define arch_get_mmap_base(addr, base) \
+   (((addr) > DEFAULT_MAP_WINDOW) ? (base) + TASK_SIZE - 
DEFAULT_MAP_WINDOW : (base))
+
+#define arch_get_mmap_end(addr, len, flags) \
+   (((addr) > DEFAULT_MAP_WINDOW) || \
+(((flags) & MAP_FIXED) && ((addr) + (len) > DEFAULT_MAP_WINDOW)) ? 
TASK_SIZE : \
+   
DEFAULT_MAP_WINDOW)
+
 #endif /* _ASM_POWERPC_TASK_SIZE_64_H */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9b0d6e395bc0..46781d0103d1 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -81,115 +81,15 @@ static inline unsigned long mmap_base(unsigned long rnd,
 }
 
 #ifdef HAVE_ARCH_UNMAPPED_AREA
-#ifdef CONFIG_PPC_RADIX_MMU
-/*
- * Same function as generic code used only for radix, because we don't need to 
overload
- * the generic one. But we will have to duplicate, because hash select
- * HAVE_ARCH_UNMAPPED_AREA
- */
-static unsigned long
-radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
-unsigned long len, unsigned long pgoff,
-unsigned long flags)
-{
-   struct mm_struct *mm = current->mm;
-   struct vm_area_struct *vma;
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   return addr;
-   }
-
-   if (addr) {
-   addr = PAGE_ALIGN(addr);
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-
-   info.flags = 0;
-   info.length = len;
-   info.low_limit = mm->mmap_base;
-   info.high_limit = high_limit;
-   info.align_mask = 0;
-
-   return vm_unmapped_area();
-}
-
-static unsigned long
-radix__arch_get_unmapped_area_topdown(struct file *filp,
-const unsigned long addr0,
-const unsigned long len,
-const unsigned long pgoff,
-const unsigned long flags)
-{
-   struct vm_area_struct *vma;
-   struct mm_struct *mm = current->mm;
-   unsigned long addr = addr0;
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   return addr;
-   }
-
-   if (addr) {
-   addr = PAGE_ALIGN(addr);
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-
-   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-   info.length = len;
-   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
-   info.align_mask = 0;
-
-   addr = vm_unmapped_area();
-   if (!(addr & ~PAGE_MASK))
-   return addr;
-   VM_BUG_ON(addr != -ENOMEM);
-
-   /*
-* A failed mmap() very likely causes application failure,
-

[PATCH v6 11/14] powerpc/mm: Move get_unmapped_area functions to slice.c

2021-12-17 Thread Christophe Leroy
hugetlb_get_unmapped_area() is now identical to the
generic version if only RADIX is enabled, so move it
to slice.c and let it fallback on the generic one
when HASH MMU is not compiled in.

Do the same with arch_get_unmapped_area() and
arch_get_unmapped_area_topdown().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/mmu.h   |  6 
 arch/powerpc/include/asm/book3s/64/slice.h |  6 
 arch/powerpc/mm/book3s64/slice.c   | 42 ++
 arch/powerpc/mm/hugetlbpage.c  | 21 ---
 arch/powerpc/mm/mmap.c | 36 ---
 5 files changed, 48 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 7fee46e50377..310ca3597d58 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -4,12 +4,6 @@
 
 #include 
 
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-
 #ifndef __ASSEMBLY__
 /*
  * Page size definition
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
b/arch/powerpc/include/asm/book3s/64/slice.h
index 5b0f7105bc8b..b8eb4ad271b9 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -4,6 +4,12 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
 #define SLICE_LOW_SHIFT28
 #define SLICE_LOW_TOP  (0x1ul)
 #define SLICE_NUM_LOW  (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index e4382713746d..03681042b807 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -639,6 +639,32 @@ unsigned long slice_get_unmapped_area(unsigned long addr, 
unsigned long len,
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
+unsigned long arch_get_unmapped_area(struct file *filp,
+unsigned long addr,
+unsigned long len,
+unsigned long pgoff,
+unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
+
+   return slice_get_unmapped_area(addr, len, flags,
+  
mm_ctx_user_psize(>mm->context), 0);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+const unsigned long addr0,
+const unsigned long len,
+const unsigned long pgoff,
+const unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_get_unmapped_area_topdown(filp, addr0, len, 
pgoff, flags);
+
+   return slice_get_unmapped_area(addr0, len, flags,
+  
mm_ctx_user_psize(>mm->context), 1);
+}
+
 unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
unsigned char *psizes;
@@ -766,4 +792,20 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 
return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, 
vma->vm_start));
 }
+
+static int file_to_psize(struct file *file)
+{
+   struct hstate *hstate = hstate_file(file);
+   return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+   unsigned long len, unsigned long pgoff,
+   unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_hugetlb_get_unmapped_area(file, addr, len, 
pgoff, flags);
+
+   return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 
1);
+}
 #endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index bfd7f4af1e58..eb9de09e49a3 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -542,27 +542,6 @@ struct page *follow_huge_pd(struct vm_area_struct *vma,
return page;
 }
 
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-static inline int file_to_psize(struct file *file)
-{
-   struct hstate *hstate = hstate_file(file);
-   return shift_to_mmu_psize(huge_page_shift(hstate));
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags)
-{
-   if (radix_enabled())
-   return generic_hugetlb_get_unmapped_area(file, addr, len,
-  

[PATCH v6 08/14] powerpc/mm: Remove CONFIG_PPC_MM_SLICES

2021-12-17 Thread Christophe Leroy
CONFIG_PPC_MM_SLICES is always selected by hash book3s/64.
CONFIG_PPC_MM_SLICES is never selected by other platforms.

Remove it.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/hugetlb.h |  2 +-
 arch/powerpc/include/asm/paca.h|  7 ---
 arch/powerpc/kernel/paca.c |  5 -
 arch/powerpc/mm/book3s64/Makefile  |  3 +--
 arch/powerpc/mm/book3s64/hash_utils.c  | 14 --
 arch/powerpc/mm/hugetlbpage.c  |  2 +-
 arch/powerpc/mm/mmap.c |  4 ++--
 arch/powerpc/platforms/Kconfig.cputype |  4 
 8 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index f18c543bc01d..86a60ba6bd2a 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -24,7 +24,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr,
 unsigned long len)
 {
-   if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+   if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled())
return slice_is_hugepage_only_range(mm, addr, len);
return 0;
 }
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 295573a82c66..bd4dd02e61c8 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,16 +152,9 @@ struct paca_struct {
struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
-#ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_64S_HASH_MMU
-#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
-#else
-   u16 mm_ctx_user_psize;
-   u16 mm_ctx_sllp;
-#endif
-#endif
 #endif
 
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 39da688a9455..ba593fd60124 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -344,15 +344,10 @@ void copy_mm_to_paca(struct mm_struct *mm)
 {
mm_context_t *context = >context;
 
-#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
memcpy(_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
   LOW_SLICE_ARRAY_SZ);
memcpy(_paca()->mm_ctx_high_slices_psize, 
mm_ctx_high_slices(context),
   TASK_SLICE_ARRAY_SZ(context));
-#else /* CONFIG_PPC_MM_SLICES */
-   get_paca()->mm_ctx_user_psize = context->user_psize;
-   get_paca()->mm_ctx_sllp = context->sllp;
-#endif
 }
 #endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index af2f3e75d458..d527dc8e30a8 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -5,7 +5,7 @@ ccflags-y   := $(NO_MINIMAL_TOC)
 obj-y  += mmu_context.o pgtable.o trace.o
 ifdef CONFIG_PPC_64S_HASH_MMU
 CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o
+obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o 
slice.o
 obj-$(CONFIG_PPC_HASH_MMU_NATIVE)  += hash_native.o
 obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)+= hash_64k.o
@@ -21,7 +21,6 @@ obj-$(CONFIG_PPC_RADIX_MMU)   += radix_hugetlbpage.o
 endif
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += iommu_api.o
 obj-$(CONFIG_PPC_PKEY) += pkeys.o
-obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 
 # Instrumenting the SLB fault path can lead to duplicate SLB entries
 KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index eced266dc5e9..7ecadf5e6bf9 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1264,7 +1264,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, 
pte_t pte, int trap)
return pp;
 }
 
-#ifdef CONFIG_PPC_MM_SLICES
 static unsigned int get_paca_psize(unsigned long addr)
 {
unsigned char *psizes;
@@ -1281,12 +1280,6 @@ static unsigned int get_paca_psize(unsigned long addr)
return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
-#else
-unsigned int get_paca_psize(unsigned long addr)
-{
-   return get_paca()->mm_ctx_user_psize;
-}
-#endif
 
 /*
  * Demote a segment to using 4k pages.
@@ -1710,7 +1703,6 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
return 0;
 }
 
-#ifdef CONFIG_PPC_MM_SLICES
 static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
 {
int psize = get_slice_psize(mm, ea);
@@ -1727,12 +1719,6 @@ static bool should_hash_preload(struct mm_struct *mm, 
unsigned long ea)
 
return true;
 }
-#else
-static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
-{
-   return true;
-}
-#endif
 
 

[PATCH v6 07/14] powerpc/mm: Make slice specific to book3s/64

2021-12-17 Thread Christophe Leroy
Since commit 555904d07eef ("powerpc/8xx: MM_SLICE is not needed
anymore") only book3s/64 selects CONFIG_PPC_MM_SLICES.

Move slice.c into mm/book3s64/

Move necessary stuff in asm/book3s/64/slice.h and
remove asm/slice.h

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/book3s/64/slice.h| 18 
 arch/powerpc/include/asm/page.h   |  1 -
 arch/powerpc/include/asm/slice.h  | 46 ---
 arch/powerpc/mm/Makefile  |  1 -
 arch/powerpc/mm/book3s64/Makefile |  1 +
 arch/powerpc/mm/{ => book3s64}/slice.c|  2 -
 arch/powerpc/mm/nohash/mmu_context.c  |  9 
 arch/powerpc/mm/nohash/tlb.c  |  4 --
 9 files changed, 20 insertions(+), 63 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/slice.h
 rename arch/powerpc/mm/{ => book3s64}/slice.c (99%)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 21f780942911..1c4eebbc69c9 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -18,6 +18,7 @@
  * complete pgtable.h but only a portion of it.
  */
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
b/arch/powerpc/include/asm/book3s/64/slice.h
index f0d3194ba41b..5b0f7105bc8b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
 #define _ASM_POWERPC_BOOK3S_64_SLICE_H
 
+#ifndef __ASSEMBLY__
+
 #define SLICE_LOW_SHIFT28
 #define SLICE_LOW_TOP  (0x1ul)
 #define SLICE_NUM_LOW  (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,4 +15,20 @@
 
 #define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
 
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+  unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..62e0c6f12869 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,6 +329,5 @@ static inline unsigned long kaslr_offset(void)
 
 #include 
 #endif /* __ASSEMBLY__ */
-#include 
 
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
deleted file mode 100644
index 0bdd9c62eca0..
--- a/arch/powerpc/include/asm/slice.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_SLICE_H
-#define _ASM_POWERPC_SLICE_H
-
-#ifdef CONFIG_PPC_BOOK3S_64
-#include 
-#endif
-
-#ifndef __ASSEMBLY__
-
-struct mm_struct;
-
-#ifdef CONFIG_PPC_MM_SLICES
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-
-unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
- unsigned long flags, unsigned int psize,
- int topdown);
-
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
-
-void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
-  unsigned long len, unsigned int psize);
-
-void slice_init_new_context_exec(struct mm_struct *mm);
-void slice_setup_new_exec(void);
-
-#else /* CONFIG_PPC_MM_SLICES */
-
-static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
-
-static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long 
addr)
-{
-   return 0;
-}
-
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index df8172da2301..d4c20484dad9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)+= book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)+= book3s64/
 obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index 2d50cac499c5..af2f3e75d458 100644

[PATCH v6 06/14] powerpc/mm: Move vma_mmu_pagesize()

2021-12-17 Thread Christophe Leroy
vma_mmu_pagesize() is only required for slices,
otherwise there is a generic weak version doing the
exact same thing.

Move it to slice.c

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/mm/hugetlbpage.c | 11 ---
 arch/powerpc/mm/slice.c   |  9 +
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index ddead41e2194..0eec3b61bd13 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -565,17 +565,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
 }
 #endif
 
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
-   /* With radix we don't use slice, so derive it from vma*/
-   if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
-   unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
-   return 1UL << mmu_psize_to_shift(psize);
-   }
-   return vma_kernel_pagesize(vma);
-}
-
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
int shift = __ffs(size);
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index f42711f865f3..8a3ac062b71e 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -759,4 +759,13 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, 
unsigned long addr,
 
return !slice_check_range_fits(mm, maskp, addr, len);
 }
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+   /* With radix we don't use slice, so derive it from vma*/
+   if (radix_enabled())
+   return vma_kernel_pagesize(vma);
+
+   return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, 
vma->vm_start));
+}
 #endif
-- 
2.33.1


[PATCH v6 05/14] sizes.h: Add SZ_1T macro

2021-12-17 Thread Christophe Leroy
Today drivers/pci/controller/pci-xgene.c defines SZ_1T

Move it into linux/sizes.h so that it can be re-used elsewhere.

Cc: Toan Le 
Cc: linux-...@vger.kernel.org
Signed-off-by: Christophe Leroy 
Reviewed-by: Krzysztof Wilczyński 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/controller/pci-xgene.c | 1 -
 include/linux/sizes.h  | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pci-xgene.c 
b/drivers/pci/controller/pci-xgene.c
index 56d0d50338c8..716dcab5ca47 100644
--- a/drivers/pci/controller/pci-xgene.c
+++ b/drivers/pci/controller/pci-xgene.c
@@ -49,7 +49,6 @@
 #define EN_REG 0x0001
 #define OB_LO_IO   0x0002
 #define XGENE_PCIE_DEVICEID0xE004
-#define SZ_1T  (SZ_1G*1024ULL)
 #define PIPE_PHY_RATE_RD(src)  ((0xc000 & (u32)(src)) >> 0xe)
 
 #define XGENE_V1_PCI_EXP_CAP   0x40
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 1ac79bcee2bb..84aa448d8bb3 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -47,6 +47,8 @@
 #define SZ_8G  _AC(0x2, ULL)
 #define SZ_16G _AC(0x4, ULL)
 #define SZ_32G _AC(0x8, ULL)
+
+#define SZ_1T  _AC(0x100, ULL)
 #define SZ_64T _AC(0x4000, ULL)
 
 #endif /* __LINUX_SIZES_H__ */
-- 
2.33.1


[PATCH v6 04/14] mm, hugetlbfs: Allow for "high" userspace addresses

2021-12-17 Thread Christophe Leroy
This is a complement of f6795053dac8 ("mm: mmap: Allow for "high"
userspace addresses") for hugetlb.

This patch adds support for "high" userspace addresses that are
optionally supported on the system and have to be requested via a hint
mechanism ("high" addr parameter to mmap).

Architectures such as powerpc and x86 achieve this by making changes to
their architectural versions of hugetlb_get_unmapped_area() function.
However, arm64 uses the generic version of that function.

So take into account arch_get_mmap_base() and arch_get_mmap_end() in
hugetlb_get_unmapped_area(). To allow that, move those two macros
out of mm/mmap.c into include/linux/sched/mm.h

If these macros are not defined in architectural code then they default
to (TASK_SIZE) and (base) so should not introduce any behavioural
changes to architectures that do not define them.

For the time being, only ARM64 is affected by this change.

Signed-off-by: Christophe Leroy 
Cc: Steve Capper 
Cc: Will Deacon 
Cc: Catalin Marinas 
---
 fs/hugetlbfs/inode.c | 9 +
 include/linux/sched/mm.h | 8 
 mm/mmap.c| 8 
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c7cde4e5924d..a8d3b0899b60 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -205,7 +205,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, 
unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = TASK_SIZE;
+   info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area();
@@ -221,7 +221,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = current->mm->mmap_base;
+   info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area();
@@ -236,7 +236,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = TASK_SIZE;
+   info.high_limit = arch_get_mmap_end(addr, len, flags);
addr = vm_unmapped_area();
}
 
@@ -251,6 +251,7 @@ generic_hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -266,7 +267,7 @@ generic_hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
-   if (TASK_SIZE - len >= addr &&
+   if (mmap_end - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2584f7c13f69..cc9d80bd36d5 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -135,6 +135,14 @@ static inline void mm_update_next_owner(struct mm_struct 
*mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr, len, flags)(TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
  struct rlimit *rlim_stack);
 extern unsigned long
diff --git a/mm/mmap.c b/mm/mmap.c
index ad48f7af7511..c773b5ad9a11 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2112,14 +2112,6 @@ unsigned long vm_unmapped_area(struct 
vm_unmapped_area_info *info)
return addr;
 }
 
-#ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr, len, flags)(TASK_SIZE)
-#endif
-
-#ifndef arch_get_mmap_base
-#define arch_get_mmap_base(addr, base) (base)
-#endif
-
 /* Get an address range which is currently unmapped.
  * For shmat() with addr=0.
  *
-- 
2.33.1


[PATCH v6 03/14] mm: Add len and flags parameters to arch_get_mmap_end()

2021-12-17 Thread Christophe Leroy
Powerpc needs flags and len to make decision on arch_get_mmap_end().

So add them as parameters to arch_get_mmap_end().

Signed-off-by: Christophe Leroy 
Cc: Steve Capper 
Cc: Catalin Marinas 
Cc: Will Deacon 
---
 arch/arm64/include/asm/processor.h | 4 ++--
 mm/mmap.c  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h 
b/arch/arm64/include/asm/processor.h
index 6f41b65f9962..88c696350ace 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -91,8 +91,8 @@
 #endif /* CONFIG_COMPAT */
 
 #ifndef CONFIG_ARM64_FORCE_52BIT
-#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
-   DEFAULT_MAP_WINDOW)
+#define arch_get_mmap_end(addr, len, flags) ((addr > DEFAULT_MAP_WINDOW) ? 
TASK_SIZE :\
+  
DEFAULT_MAP_WINDOW)
 
 #define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
diff --git a/mm/mmap.c b/mm/mmap.c
index 7ac6a07ff382..ad48f7af7511 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2113,7 +2113,7 @@ unsigned long vm_unmapped_area(struct 
vm_unmapped_area_info *info)
 }
 
 #ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr)(TASK_SIZE)
+#define arch_get_mmap_end(addr, len, flags)(TASK_SIZE)
 #endif
 
 #ifndef arch_get_mmap_base
@@ -2139,7 +2139,7 @@ generic_get_unmapped_area(struct file *filp, unsigned 
long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vm_unmapped_area_info info;
-   const unsigned long mmap_end = arch_get_mmap_end(addr);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
@@ -2187,7 +2187,7 @@ generic_get_unmapped_area_topdown(struct file *filp, 
unsigned long addr,
struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
struct vm_unmapped_area_info info;
-   const unsigned long mmap_end = arch_get_mmap_end(addr);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
/* requested length too big for entire address space */
if (len > mmap_end - mmap_min_addr)
-- 
2.33.1


[PATCH v6 02/14] mm, hugetlbfs: Allow an arch to always use generic versions of get_unmapped_area functions

2021-12-17 Thread Christophe Leroy
Unlike most architectures, powerpc can only define at runtime
if it is going to use the generic arch_get_unmapped_area() or not.

Today, powerpc has a copy of the generic arch_get_unmapped_area()
because when selection HAVE_ARCH_UNMAPPED_AREA the generic
arch_get_unmapped_area() is not available.

Rename it generic_get_unmapped_area() and make it independent of
HAVE_ARCH_UNMAPPED_AREA.

Do the same for arch_get_unmapped_area_topdown() versus
HAVE_ARCH_UNMAPPED_AREA_TOPDOWN.

Do the same for hugetlb_get_unmapped_area() versus
HAVE_ARCH_HUGETLB_UNMAPPED_AREA.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 fs/hugetlbfs/inode.c | 17 +
 include/linux/hugetlb.h  |  5 +
 include/linux/sched/mm.h |  9 +
 mm/mmap.c| 31 ---
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 49d2e686be74..c7cde4e5924d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -195,7 +195,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct 
vm_area_struct *vma)
  * Called under mmap_write_lock(mm).
  */
 
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 static unsigned long
 hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -244,9 +243,10 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
return addr;
 }
 
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -282,6 +282,15 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long 
addr,
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
 }
+
+#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+   return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+}
 #endif
 
 static size_t
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 00351ccb49a3..df899d1937ff 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -513,6 +513,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
unsigned long flags);
 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+
 /*
  * huegtlb page specific state flags.  These flags are located in page.private
  * of the hugetlb head page.  Functions created via the below macros should be
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aca874d33fe6..2584f7c13f69 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -144,6 +144,15 @@ extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  unsigned long len, unsigned long pgoff,
  unsigned long flags);
+
+unsigned long
+generic_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+unsigned long
+generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
 #else
 static inline void arch_pick_mmap_layout(struct mm_struct *mm,
 struct rlimit *rlim_stack) {}
diff --git a/mm/mmap.c b/mm/mmap.c
index bfb0ea164a90..7ac6a07ff382 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2131,10 +2131,10 @@ unsigned long vm_unmapped_area(struct 
vm_unmapped_area_info *info)
  *
  * This function "knows" that -ENOMEM has the bits set.
  */
-#ifndef HAVE_ARCH_UNMAPPED_AREA
 unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
-   unsigned long len, unsigned long pgoff, unsigned long flags)
+generic_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
@@ -2164,17 +2164,25 @@ 

[PATCH v6 01/14] mm: Allow arch specific arch_randomize_brk() with CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT

2021-12-17 Thread Christophe Leroy
Commit e7142bf5d231 ("arm64, mm: make randomization selected by
generic topdown mmap layout") introduced a default version of
arch_randomize_brk() provided when
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT is selected.

powerpc could select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
but needs to provide its own arch_randomize_brk().

In order to allow that, define generic version of arch_randomize_brk()
as a __weak symbol.

Cc: Alexandre Ghiti 
Signed-off-by: Christophe Leroy 
---
 mm/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/util.c b/mm/util.c
index 741ba32a43ac..46d1a2dd7a32 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -344,7 +344,7 @@ unsigned long randomize_stack_top(unsigned long stack_top)
 }
 
 #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
-unsigned long arch_randomize_brk(struct mm_struct *mm)
+unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
 {
/* Is the current task 32bit ? */
if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
-- 
2.33.1


[PATCH v6 00/14] Convert powerpc to default topdown mmap layout

2021-12-17 Thread Christophe Leroy
Rebased on top of powerpc/next branch

This series converts powerpc to default topdown mmap layout.

powerpc requires its own arch_get_unmapped_area() only when
slices are needed, which is only for book3s/64. First part of
the series moves slices into book3s/64 specific directories
and cleans up other subarchitectures.

Last part converts to default topdown mmap layout.

A small modification is done to core mm to allow
powerpc to still provide its own arch_randomize_brk()

Another modification is done to core mm to allow powerpc
to use generic versions of get_unmapped_area functions for Radix
while still providing its own implementation for Hash, the
selection between Radix and Hash being doing at runtime.

Last modification to core mm is to give len and flags to
arch_get_mmap_end().

Signed-off-by: Christophe Leroy 

Changes in v6:
- New patch (patch 4) to take arch_get_mmap_base() and arch_get_mmap_end() into 
account in generic hugetlb_get_unmapped_area()
- Get back arch_randomize_brk() simplification as it relies on default topdown 
mmap layout.
- Fixed precedence between || and && in powerpc's arch_get_mmap_end() (patch 9)

Changes in v5:
- Added patch 3
- Added arch_get_mmap_base() and arch_get_mmap_end() to patch 7 to better match 
original powerpc behaviour
- Switched patched 10 and 11 and performed full randomisation in patch 10 just 
before switching to default implementation, as suggested by Nic.

Changes in v4:
- Move arch_randomize_brk() simplification out of this series
- Add a change to core mm to enable using generic implementation
while providing arch specific one at the same time.
- Reworked radix get_unmapped_area to use generic implementation
- Rebase on top of Nic's series v6

Changes in v3:
- Fixed missing  in last patch
- Added a patch to move SZ_1T out of drivers/pci/controller/pci-xgene.c

Changes in v2:
- Moved patch 4 before patch 2
- Make generic arch_randomize_brk() __weak
- Added patch 9

Christophe Leroy (14):
  mm: Allow arch specific arch_randomize_brk() with
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
  mm, hugetlbfs: Allow an arch to always use generic versions of
get_unmapped_area functions
  mm: Add len and flags parameters to arch_get_mmap_end()
  mm, hugetlbfs: Allow for "high" userspace addresses
  sizes.h: Add SZ_1T macro
  powerpc/mm: Move vma_mmu_pagesize()
  powerpc/mm: Make slice specific to book3s/64
  powerpc/mm: Remove CONFIG_PPC_MM_SLICES
  powerpc/mm: Use generic_get_unmapped_area() and call it from
arch_get_unmapped_area()
  powerpc/mm: Use generic_hugetlb_get_unmapped_area()
  powerpc/mm: Move get_unmapped_area functions to slice.c
  powerpc/mm: Enable full randomisation of memory mappings
  powerpc/mm: Convert to default topdown mmap layout
  powerpc: Simplify and move arch_randomize_brk()

 arch/arm64/include/asm/processor.h|   4 +-
 arch/powerpc/Kconfig  |   2 +-
 arch/powerpc/include/asm/book3s/64/hugetlb.h  |   4 -
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |   1 +
 arch/powerpc/include/asm/book3s/64/mmu.h  |   6 -
 arch/powerpc/include/asm/book3s/64/slice.h|  24 ++
 arch/powerpc/include/asm/hugetlb.h|   2 +-
 arch/powerpc/include/asm/paca.h   |   7 -
 arch/powerpc/include/asm/page.h   |   1 -
 arch/powerpc/include/asm/processor.h  |   2 -
 arch/powerpc/include/asm/slice.h  |  46 
 arch/powerpc/include/asm/task_size_64.h   |   8 +
 arch/powerpc/kernel/paca.c|   5 -
 arch/powerpc/kernel/process.c |  41 ---
 arch/powerpc/mm/Makefile  |   3 +-
 arch/powerpc/mm/book3s64/Makefile |   2 +-
 arch/powerpc/mm/book3s64/hash_utils.c |  33 ++-
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  55 
 arch/powerpc/mm/{ => book3s64}/slice.c|  71 -
 arch/powerpc/mm/hugetlbpage.c |  34 ---
 arch/powerpc/mm/mmap.c| 256 --
 arch/powerpc/mm/nohash/mmu_context.c  |   9 -
 arch/powerpc/mm/nohash/tlb.c  |   4 -
 arch/powerpc/platforms/Kconfig.cputype|   4 -
 drivers/pci/controller/pci-xgene.c|   1 -
 fs/hugetlbfs/inode.c  |  26 +-
 include/linux/hugetlb.h   |   5 +
 include/linux/sched/mm.h  |  17 ++
 include/linux/sizes.h |   2 +
 mm/mmap.c |  43 +--
 mm/util.c |   2 +-
 31 files changed, 185 insertions(+), 535 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/slice.h
 rename arch/powerpc/mm/{ => book3s64}/slice.c (91%)
 delete mode 100644 arch/powerpc/mm/mmap.c

-- 
2.33.1


Re: [PATCH v2 1/2] powerpc/set_memory: Avoid spinlock recursion in change_page_attr()

2021-12-17 Thread Maxime Bizon


On Thu, 2021-12-16 at 17:47 +, Christophe Leroy wrote:

Tested-by: Maxime Bizon 

Now running fine with every CONFIG_DEBUG_xxx enabled, thanks!

-- 
Maxime