[PATCH V5 7/7] mm/mmap: Drop arch_vm_get_page_pgprot()

2022-04-11 Thread Anshuman Khandual
There are no platforms left which use arch_vm_get_page_prot(). Just drop
generic arch_vm_get_page_prot().

Cc: Andrew Morton 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Reviewed-by: Catalin Marinas 
Signed-off-by: Anshuman Khandual 
---
 include/linux/mman.h | 4 
 mm/mmap.c| 3 +--
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/linux/mman.h b/include/linux/mman.h
index b66e91b8176c..58b3abd457a3 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -93,10 +93,6 @@ static inline void vm_unacct_memory(long pages)
 #define arch_calc_vm_flag_bits(flags) 0
 #endif
 
-#ifndef arch_vm_get_page_prot
-#define arch_vm_get_page_prot(vm_flags) __pgprot(0)
-#endif
-
 #ifndef arch_validate_prot
 /*
  * This is called from mprotect().  PROT_GROWSDOWN and PROT_GROWSUP have
diff --git a/mm/mmap.c b/mm/mmap.c
index edf2a5e38f4d..db7f33154206 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -110,8 +110,7 @@ pgprot_t protection_map[16] __ro_after_init = {
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
-   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
-   pgprot_val(arch_vm_get_page_prot(vm_flags)));
+   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]));
 
return ret;
 }
-- 
2.25.1



[PATCH V5 6/7] mm/mmap: Drop arch_filter_pgprot()

2022-04-11 Thread Anshuman Khandual
There are no platforms left which subscribe ARCH_HAS_FILTER_PGPROT. Hence
drop generic arch_filter_pgprot() and also config ARCH_HAS_FILTER_PGPROT.

Cc: Andrew Morton 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Reviewed-by: Catalin Marinas 
Signed-off-by: Anshuman Khandual 
---
 mm/Kconfig | 3 ---
 mm/mmap.c  | 9 +
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index b1f7624276f8..3f7b6d7b69df 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -762,9 +762,6 @@ config ARCH_HAS_CURRENT_STACK_POINTER
  register alias named "current_stack_pointer", this config can be
  selected.
 
-config ARCH_HAS_FILTER_PGPROT
-   bool
-
 config ARCH_HAS_VM_GET_PAGE_PROT
bool
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 87cb2eaf7e1a..edf2a5e38f4d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -107,20 +107,13 @@ pgprot_t protection_map[16] __ro_after_init = {
 };
 
 #ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
-#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
-   return prot;
-}
-#endif
-
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
pgprot_val(arch_vm_get_page_prot(vm_flags)));
 
-   return arch_filter_pgprot(ret);
+   return ret;
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 #endif /* CONFIG_ARCH_HAS_VM_GET_PAGE_PROT */
-- 
2.25.1



[PATCH V5 5/7] x86/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

2022-04-11 Thread Anshuman Khandual
From: Christoph Hellwig 

This defines and exports a platform specific custom vm_get_page_prot() via
subscribing ARCH_HAS_VM_GET_PAGE_PROT. This also unsubscribes from config
ARCH_HAS_FILTER_PGPROT, after dropping off arch_filter_pgprot() and
arch_vm_get_page_prot().

Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Christoph Hellwig 
Signed-off-by: Anshuman Khandual 
---
 arch/x86/Kconfig |  2 +-
 arch/x86/include/asm/pgtable.h   |  5 -
 arch/x86/include/uapi/asm/mman.h | 14 -
 arch/x86/mm/Makefile |  2 +-
 arch/x86/mm/pgprot.c | 35 
 5 files changed, 37 insertions(+), 21 deletions(-)
 create mode 100644 arch/x86/mm/pgprot.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0142e01002e..c355c420150e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -76,7 +76,6 @@ config X86
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
-   select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOVif X86_64
@@ -95,6 +94,7 @@ config X86
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select ARCH_HAS_VM_GET_PAGE_PROT
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 62ab07e24aef..3563f4645fa1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -649,11 +649,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, 
pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
-   return canon_pgprot(prot);
-}
-
 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 enum page_cache_mode pcm,
 enum page_cache_mode new_pcm)
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfb..775dbd3aff73 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,20 +5,6 @@
 #define MAP_32BIT  0x40/* only give out 32bit addresses */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags)__pgprot(   \
-   ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \
-   ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \
-   ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \
-   ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
 #define arch_calc_vm_prot_bits(prot, key) (\
((key) & 0x1 ? VM_PKEY_BIT0 : 0) |  \
((key) & 0x2 ? VM_PKEY_BIT1 : 0) |  \
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fe3d3061fc11..fb6b41a48ae5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o  = -pg
 endif
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o 
extable.o mmap.o \
-   pgtable.o physaddr.o setup_nx.o tlb.o 
cpu_entry_area.o maccess.o
+   pgtable.o physaddr.o setup_nx.o tlb.o 
cpu_entry_area.o maccess.o pgprot.o
 
 obj-y  += pat/
 
diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c
new file mode 100644
index ..763742782286
--- /dev/null
+++ b/arch/x86/mm/pgprot.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+#include 
+#include 
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+   unsigned long val = pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+   /*
+* Take the 4 protection key bits out of the vma->vm_flags value and
+* turn them in to the bits that we can put in to a pte.
+*
+* Only override these if Protection Keys are available (which is only
+* on 64-bit).
+*/
+   if (vm_flags & VM_PKEY_BIT0)
+   val |= _PAGE_PKEY_BIT0;
+   if (vm_flags & VM_PKEY_BIT1)
+   val |= _PAGE_PKEY_BIT1;
+   if (vm_flags & VM_PKEY_BIT2)
+   val |= _PAGE_PKEY_BIT2;
+   if (vm_flags & VM_PKEY_BIT3)
+   val |= _PAGE_PKEY_BIT3;
+#endif
+
+   val = 

[PATCH V5 4/7] sparc/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

2022-04-11 Thread Anshuman Khandual
This defines and exports a platform specific custom vm_get_page_prot() via
subscribing ARCH_HAS_VM_GET_PAGE_PROT. It localizes arch_vm_get_page_prot()
as sparc_vm_get_page_prot() and moves near vm_get_page_prot().

Cc: "David S. Miller" 
Cc: Khalid Aziz 
Cc: sparcli...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Reviewed-by: Khalid Aziz 
Signed-off-by: Anshuman Khandual 
---
 arch/sparc/Kconfig|  1 +
 arch/sparc/include/asm/mman.h |  6 --
 arch/sparc/mm/init_64.c   | 13 +
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9200bc04701c..85b573643af6 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -84,6 +84,7 @@ config SPARC64
select PERF_USE_VMALLOC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select HAVE_C_RECORDMCOUNT
+   select ARCH_HAS_VM_GET_PAGE_PROT
select HAVE_ARCH_AUDITSYSCALL
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index 274217e7ed70..af9c10c83dc5 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -46,12 +46,6 @@ static inline unsigned long sparc_calc_vm_prot_bits(unsigned 
long prot)
}
 }
 
-#define arch_vm_get_page_prot(vm_flags) sparc_vm_get_page_prot(vm_flags)
-static inline pgprot_t sparc_vm_get_page_prot(unsigned long vm_flags)
-{
-   return (vm_flags & VM_SPARC_ADI) ? __pgprot(_PAGE_MCD_4V) : __pgprot(0);
-}
-
 #define arch_validate_prot(prot, addr) sparc_validate_prot(prot, addr)
 static inline int sparc_validate_prot(unsigned long prot, unsigned long addr)
 {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8b1911591581..dcb17763c1f2 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -3184,3 +3184,16 @@ void copy_highpage(struct page *to, struct page *from)
}
 }
 EXPORT_SYMBOL(copy_highpage);
+
+static pgprot_t sparc_vm_get_page_prot(unsigned long vm_flags)
+{
+   return (vm_flags & VM_SPARC_ADI) ? __pgprot(_PAGE_MCD_4V) : __pgprot(0);
+}
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+   return __pgprot(pgprot_val(protection_map[vm_flags &
+   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+   pgprot_val(sparc_vm_get_page_prot(vm_flags)));
+}
+EXPORT_SYMBOL(vm_get_page_prot);
-- 
2.25.1



[PATCH V5 3/7] arm64/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

2022-04-11 Thread Anshuman Khandual
This defines and exports a platform specific custom vm_get_page_prot() via
subscribing ARCH_HAS_VM_GET_PAGE_PROT. It localizes arch_vm_get_page_prot()
and moves it near vm_get_page_prot().

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org
Reviewed-by: Catalin Marinas 
Signed-off-by: Anshuman Khandual 
---
 arch/arm64/Kconfig|  1 +
 arch/arm64/include/asm/mman.h | 24 
 arch/arm64/mm/mmap.c  | 25 +
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 57c4c995965f..dd0b15162bb3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -45,6 +45,7 @@ config ARM64
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+   select ARCH_HAS_VM_GET_PAGE_PROT
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_ELF_PROT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index e3e28f7daf62..5966ee4a6154 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -35,30 +35,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned 
long flags)
 }
 #define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
 
-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
-{
-   pteval_t prot = 0;
-
-   if (vm_flags & VM_ARM64_BTI)
-   prot |= PTE_GP;
-
-   /*
-* There are two conditions required for returning a Normal Tagged
-* memory type: (1) the user requested it via PROT_MTE passed to
-* mmap() or mprotect() and (2) the corresponding vma supports MTE. We
-* register (1) as VM_MTE in the vma->vm_flags and (2) as
-* VM_MTE_ALLOWED. Note that the latter can only be set during the
-* mmap() call since mprotect() does not accept MAP_* flags.
-* Checking for VM_MTE only is sufficient since arch_validate_flags()
-* does not permit (VM_MTE & !VM_MTE_ALLOWED).
-*/
-   if (vm_flags & VM_MTE)
-   prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
-
-   return __pgprot(prot);
-}
-#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-
 static inline bool arch_validate_prot(unsigned long prot,
unsigned long addr __always_unused)
 {
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 77ada00280d9..78e9490f748d 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -55,3 +55,28 @@ static int __init adjust_protection_map(void)
return 0;
 }
 arch_initcall(adjust_protection_map);
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+   pteval_t prot = pgprot_val(protection_map[vm_flags &
+  (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
+
+   if (vm_flags & VM_ARM64_BTI)
+   prot |= PTE_GP;
+
+   /*
+* There are two conditions required for returning a Normal Tagged
+* memory type: (1) the user requested it via PROT_MTE passed to
+* mmap() or mprotect() and (2) the corresponding vma supports MTE. We
+* register (1) as VM_MTE in the vma->vm_flags and (2) as
+* VM_MTE_ALLOWED. Note that the latter can only be set during the
+* mmap() call since mprotect() does not accept MAP_* flags.
+* Checking for VM_MTE only is sufficient since arch_validate_flags()
+* does not permit (VM_MTE & !VM_MTE_ALLOWED).
+*/
+   if (vm_flags & VM_MTE)
+   prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
+
+   return __pgprot(prot);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
-- 
2.25.1



[PATCH V5 2/7] powerpc/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

2022-04-11 Thread Anshuman Khandual
This defines and exports a platform specific custom vm_get_page_prot() via
subscribing ARCH_HAS_VM_GET_PAGE_PROT. While here, this also localizes
arch_vm_get_page_prot() as __vm_get_page_prot() and moves it near
vm_get_page_prot().

Cc: Michael Ellerman 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/mman.h| 12 
 arch/powerpc/mm/book3s64/pgtable.c | 20 
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 174edabb74fa..69e44358a235 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@ config PPC
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select ARCH_HAS_VM_GET_PAGE_PROTif PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7cb6d18f5cd6..1b024e64c8ec 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -24,18 +24,6 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned 
long prot,
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
-{
-#ifdef CONFIG_PPC_MEM_KEYS
-   return (vm_flags & VM_SAO) ?
-   __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
-   __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
-#else
-   return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
-#endif
-}
-#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-
 static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
 {
if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 052e6590f84f..d0319524e27f 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -549,3 +550,22 @@ unsigned long memremap_compat_align(void)
 }
 EXPORT_SYMBOL_GPL(memremap_compat_align);
 #endif
+
+static pgprot_t __vm_get_page_prot(unsigned long vm_flags)
+{
+#ifdef CONFIG_PPC_MEM_KEYS
+   return (vm_flags & VM_SAO) ?
+   __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+   __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
+   return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
+}
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+   return __pgprot(pgprot_val(protection_map[vm_flags &
+   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+   pgprot_val(__vm_get_page_prot(vm_flags)));
+}
+EXPORT_SYMBOL(vm_get_page_prot);
-- 
2.25.1



[PATCH V5 1/7] mm/mmap: Add new config ARCH_HAS_VM_GET_PAGE_PROT

2022-04-11 Thread Anshuman Khandual
Add a new config ARCH_HAS_VM_GET_PAGE_PROT, which when subscribed enables a
given platform to define its own vm_get_page_prot() but still utilizing the
generic protection_map[] array.

Cc: Andrew Morton 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Reviewed-by: Catalin Marinas 
Suggested-by: Christoph Hellwig 
Signed-off-by: Anshuman Khandual 
---
 mm/Kconfig | 3 +++
 mm/mmap.c  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/mm/Kconfig b/mm/Kconfig
index 034d87953600..b1f7624276f8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,6 +765,9 @@ config ARCH_HAS_CURRENT_STACK_POINTER
 config ARCH_HAS_FILTER_PGPROT
bool
 
+config ARCH_HAS_VM_GET_PAGE_PROT
+   bool
+
 config ARCH_HAS_PTE_DEVMAP
bool
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 3aa839f81e63..87cb2eaf7e1a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -106,6 +106,7 @@ pgprot_t protection_map[16] __ro_after_init = {
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
 #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
 static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 {
@@ -122,6 +123,7 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
return arch_filter_pgprot(ret);
 }
 EXPORT_SYMBOL(vm_get_page_prot);
+#endif /* CONFIG_ARCH_HAS_VM_GET_PAGE_PROT */
 
 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 {
-- 
2.25.1



[PATCH V5 0/7] mm/mmap: Drop arch_vm_get_page_prot() and arch_filter_pgprot()

2022-04-11 Thread Anshuman Khandual
protection_map[] is an array based construct that translates given vm_flags
combination. This array contains page protection map, which is populated by
the platform via [__S000 .. __S111] and [__P000 .. __P111] exported macros.
Primary usage for protection_map[] is for vm_get_page_prot(), which is used
to determine page protection value for a given vm_flags. vm_get_page_prot()
implementation, could again call platform overrides arch_vm_get_page_prot()
and arch_filter_pgprot(). Some platforms override protection_map[] that was
originally built with __SXXX/__PXXX with different runtime values.

Currently there are multiple layers of abstraction i.e __SXXX/__PXXX macros
, protection_map[], arch_vm_get_page_prot() and arch_filter_pgprot() built
between the platform and generic MM, finally defining vm_get_page_prot().

Hence this series proposes to drop later two abstraction levels and instead
just move the responsibility of defining vm_get_page_prot() to the platform
(still utilizing generic protection_map[] array) itself making it clean and
simple.

This first introduces ARCH_HAS_VM_GET_PAGE_PROT which enables the platforms
to define custom vm_get_page_prot(). This starts converting platforms that
define the overrides arch_filter_pgprot() or arch_vm_get_page_prot() which
enables for those constructs to be dropped off completely.

The series has been inspired from an earlier discuss with Christoph Hellwig

https://lore.kernel.org/all/1632712920-8171-1-git-send-email-anshuman.khand...@arm.com/

This series applies on 5.18-rc2.

This series has been cross built for multiple platforms.

- Anshuman

Cc: Christoph Hellwig 
Cc: Andrew Morton 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: sparcli...@vger.kernel.org
Cc: linux...@kvack.org
Cc: linux-a...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org

Changes in V5:

- Collected new tags on various patches in the series
- Coalesced arm64_arch_vm_get_page_prot() into vm_get_page_prot() per Catalin
- Modified powerpc's vm_get_page_prot() implementation per Christophe

Changes in V4:

https://lore.kernel.org/all/20220407103251.1209606-1-anshuman.khand...@arm.com/

- ARCH_HAS_VM_GET_PAGE_PROT now excludes generic protection_map[]
- Changed platform's vm_get_page_prot() to use generic protection_map[]
- Dropped all platform changes not enabling either arch_vm_get_page_prot() or 
arch_filter_pgprot() 
- Dropped all previous tags as code base has changed

Changes in V3:

https://lore.kernel.org/all/1646045273-9343-1-git-send-email-anshuman.khand...@arm.com/

- Dropped variable 'i' from sme_early_init() on x86 platform
- Moved CONFIG_COLDFIRE vm_get_page_prot() inside arch/m68k/mm/mcfmmu.c
- Moved CONFIG_SUN3 vm_get_page_prot() inside arch/m68k/mm/sun3mmu.c
- Dropped cachebits for vm_get_page_prot() inside arch/m68k/mm/motorola.c
- Dropped PAGE_XXX_C definitions from arch/m68k/include/asm/motorola_pgtable.h
- Used PAGE_XXX instead for vm_get_page_prot() inside arch/m68k/mm/motorola.c
- Dropped all references to protection_map[] in the tree
- Replaced s/extensa/xtensa/ on the patch title
- Moved left over comments from pgtable.h into init.c on nios2 platform

Changes in V2:

https://lore.kernel.org/all/1645425519-9034-1-git-send-email-anshuman.khand...@arm.com/

- Dropped the entire comment block in [PATCH 30/30] per Geert
- Replaced __P010 (although commented) with __PAGE_COPY on arm platform
- Replaced __P101 with PAGE_READONLY on um platform

Changes in V1:

https://lore.kernel.org/all/1644805853-21338-1-git-send-email-anshuman.khand...@arm.com/

- Add white spaces around the | operators 
- Moved powerpc_vm_get_page_prot() near vm_get_page_prot() on powerpc
- Moved arm64_vm_get_page_prot() near vm_get_page_prot() on arm64
- Moved sparc_vm_get_page_prot() near vm_get_page_prot() on sparc
- Compacted vm_get_page_prot() switch cases on all platforms
-  _PAGE_CACHE040 inclusion is dependent on CPU_IS_040_OR_060
- VM_SHARED case should return PAGE_NONE (not PAGE_COPY) on SH platform
- Reorganized VM_SHARED, VM_EXEC, VM_WRITE, VM_READ
- Dropped the last patch [RFC V1 31/31] which added macros for vm_flags 
combinations
  
https://lore.kernel.org/all/1643029028-12710-32-git-send-email-anshuman.khand...@arm.com/

Changes in RFC:

https://lore.kernel.org/all/1643029028-12710-1-git-send-email-anshuman.khand...@arm.com/


Anshuman Khandual (6):
  mm/mmap: Add new config ARCH_HAS_VM_GET_PAGE_PROT
  powerpc/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT
  arm64/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT
  sparc/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT
  mm/mmap: Drop arch_filter_pgprot()
  mm/mmap: Drop arch_vm_get_page_pgprot()

Christoph Hellwig (1):
  x86/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

 arch/arm64/Kconfig |  1 +
 arch/arm64/include/asm/mman.h  | 24 
 arch/arm64/mm/mmap.c   | 25 +
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/mman.h| 12 --
 

[PATCH AUTOSEL 5.4 17/21] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit

2022-04-11 Thread Sasha Levin
From: Kefeng Wang 

[ Upstream commit ffa0b64e3be58519ae472ea29a1a1ad681e32f48 ]

mpe: On 64-bit Book3E vmalloc space starts at 0x8000.

Because of the way __pa() works we have:
  __pa(0x8000) == 0, and therefore
  virt_to_pfn(0x8000) == 0, and therefore
  virt_addr_valid(0x8000) == true

Which is wrong, virt_addr_valid() should be false for vmalloc space.
In fact all vmalloc addresses that alias with a valid PFN will return
true from virt_addr_valid(). That can cause bugs with hardened usercopy
as described below by Kefeng Wang:

  When running ethtool eth0 on 64-bit Book3E, a BUG occurred:

usercopy: Kernel memory exposure attempt detected from SLUB object not in 
SLUB page?! (offset 0, size 1048)!
kernel BUG at mm/usercopy.c:99
...
usercopy_abort+0x64/0xa0 (unreliable)
__check_heap_object+0x168/0x190
__check_object_size+0x1a0/0x200
dev_ethtool+0x2494/0x2b20
dev_ioctl+0x5d0/0x770
sock_do_ioctl+0xf0/0x1d0
sock_ioctl+0x3ec/0x5a0
__se_sys_ioctl+0xf0/0x160
system_call_exception+0xfc/0x1f0
system_call_common+0xf8/0x200

  The code shows below,

data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))

  The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true
  on 64-bit Book3E, which leads to the panic.

  As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va
  and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in
  the virt_addr_valid() for 64-bit, also add upper limit check to make
  sure the virt is below high_memory.

  Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start
  of lowmem, high_memory is the upper low virtual address, the check is
  suitable for 32-bit, this will fix the issue mentioned in commit
  602946ec2f90 ("powerpc: Set max_mapnr correctly") too.

On 32-bit there is a similar problem with high memory, that was fixed in
commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that
commit breaks highmem and needs to be reverted.

We can't easily fix __pa(), we have code that relies on its current
behaviour. So for now add extra checks to virt_addr_valid().

For 64-bit Book3S the extra checks are not necessary, the combination of
virt_to_pfn() and pfn_valid() should yield the correct result, but they
are harmless.

Signed-off-by: Kefeng Wang 
Reviewed-by: Christophe Leroy 
[mpe: Add additional change log detail]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20220406145802.538416-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/page.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 6ba5adb96a3b..0d8f9246ce15 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({  \
+   unsigned long _addr = (unsigned long)vaddr; \
+   _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&   \
+   pfn_valid(virt_to_pfn(_addr));  \
+})
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
-- 
2.35.1



[PATCH AUTOSEL 5.10 25/30] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit

2022-04-11 Thread Sasha Levin
From: Kefeng Wang 

[ Upstream commit ffa0b64e3be58519ae472ea29a1a1ad681e32f48 ]

mpe: On 64-bit Book3E vmalloc space starts at 0x8000.

Because of the way __pa() works we have:
  __pa(0x8000) == 0, and therefore
  virt_to_pfn(0x8000) == 0, and therefore
  virt_addr_valid(0x8000) == true

Which is wrong, virt_addr_valid() should be false for vmalloc space.
In fact all vmalloc addresses that alias with a valid PFN will return
true from virt_addr_valid(). That can cause bugs with hardened usercopy
as described below by Kefeng Wang:

  When running ethtool eth0 on 64-bit Book3E, a BUG occurred:

usercopy: Kernel memory exposure attempt detected from SLUB object not in 
SLUB page?! (offset 0, size 1048)!
kernel BUG at mm/usercopy.c:99
...
usercopy_abort+0x64/0xa0 (unreliable)
__check_heap_object+0x168/0x190
__check_object_size+0x1a0/0x200
dev_ethtool+0x2494/0x2b20
dev_ioctl+0x5d0/0x770
sock_do_ioctl+0xf0/0x1d0
sock_ioctl+0x3ec/0x5a0
__se_sys_ioctl+0xf0/0x160
system_call_exception+0xfc/0x1f0
system_call_common+0xf8/0x200

  The code shows below,

data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))

  The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true
  on 64-bit Book3E, which leads to the panic.

  As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va
  and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in
  the virt_addr_valid() for 64-bit, also add upper limit check to make
  sure the virt is below high_memory.

  Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start
  of lowmem, high_memory is the upper low virtual address, the check is
  suitable for 32-bit, this will fix the issue mentioned in commit
  602946ec2f90 ("powerpc: Set max_mapnr correctly") too.

On 32-bit there is a similar problem with high memory, that was fixed in
commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that
commit breaks highmem and needs to be reverted.

We can't easily fix __pa(), we have code that relies on its current
behaviour. So for now add extra checks to virt_addr_valid().

For 64-bit Book3S the extra checks are not necessary, the combination of
virt_to_pfn() and pfn_valid() should yield the correct result, but they
are harmless.

Signed-off-by: Kefeng Wang 
Reviewed-by: Christophe Leroy 
[mpe: Add additional change log detail]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20220406145802.538416-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/page.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..f2c5c26869f1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({  \
+   unsigned long _addr = (unsigned long)vaddr; \
+   _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&   \
+   pfn_valid(virt_to_pfn(_addr));  \
+})
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
-- 
2.35.1



[PATCH AUTOSEL 5.15 33/41] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit

2022-04-11 Thread Sasha Levin
From: Kefeng Wang 

[ Upstream commit ffa0b64e3be58519ae472ea29a1a1ad681e32f48 ]

mpe: On 64-bit Book3E vmalloc space starts at 0x8000.

Because of the way __pa() works we have:
  __pa(0x8000) == 0, and therefore
  virt_to_pfn(0x8000) == 0, and therefore
  virt_addr_valid(0x8000) == true

Which is wrong, virt_addr_valid() should be false for vmalloc space.
In fact all vmalloc addresses that alias with a valid PFN will return
true from virt_addr_valid(). That can cause bugs with hardened usercopy
as described below by Kefeng Wang:

  When running ethtool eth0 on 64-bit Book3E, a BUG occurred:

usercopy: Kernel memory exposure attempt detected from SLUB object not in 
SLUB page?! (offset 0, size 1048)!
kernel BUG at mm/usercopy.c:99
...
usercopy_abort+0x64/0xa0 (unreliable)
__check_heap_object+0x168/0x190
__check_object_size+0x1a0/0x200
dev_ethtool+0x2494/0x2b20
dev_ioctl+0x5d0/0x770
sock_do_ioctl+0xf0/0x1d0
sock_ioctl+0x3ec/0x5a0
__se_sys_ioctl+0xf0/0x160
system_call_exception+0xfc/0x1f0
system_call_common+0xf8/0x200

  The code shows below,

data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))

  The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true
  on 64-bit Book3E, which leads to the panic.

  As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va
  and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in
  the virt_addr_valid() for 64-bit, also add upper limit check to make
  sure the virt is below high_memory.

  Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start
  of lowmem, high_memory is the upper low virtual address, the check is
  suitable for 32-bit, this will fix the issue mentioned in commit
  602946ec2f90 ("powerpc: Set max_mapnr correctly") too.

On 32-bit there is a similar problem with high memory, that was fixed in
commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that
commit breaks highmem and needs to be reverted.

We can't easily fix __pa(), we have code that relies on its current
behaviour. So for now add extra checks to virt_addr_valid().

For 64-bit Book3S the extra checks are not necessary, the combination of
virt_to_pfn() and pfn_valid() should yield the correct result, but they
are harmless.

Signed-off-by: Kefeng Wang 
Reviewed-by: Christophe Leroy 
[mpe: Add additional change log detail]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20220406145802.538416-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/page.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..f2c5c26869f1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({  \
+   unsigned long _addr = (unsigned long)vaddr; \
+   _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&   \
+   pfn_valid(virt_to_pfn(_addr));  \
+})
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
-- 
2.35.1



[PATCH AUTOSEL 5.17 40/49] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit

2022-04-11 Thread Sasha Levin
From: Kefeng Wang 

[ Upstream commit ffa0b64e3be58519ae472ea29a1a1ad681e32f48 ]

mpe: On 64-bit Book3E vmalloc space starts at 0x8000.

Because of the way __pa() works we have:
  __pa(0x8000) == 0, and therefore
  virt_to_pfn(0x8000) == 0, and therefore
  virt_addr_valid(0x8000) == true

Which is wrong, virt_addr_valid() should be false for vmalloc space.
In fact all vmalloc addresses that alias with a valid PFN will return
true from virt_addr_valid(). That can cause bugs with hardened usercopy
as described below by Kefeng Wang:

  When running ethtool eth0 on 64-bit Book3E, a BUG occurred:

usercopy: Kernel memory exposure attempt detected from SLUB object not in 
SLUB page?! (offset 0, size 1048)!
kernel BUG at mm/usercopy.c:99
...
usercopy_abort+0x64/0xa0 (unreliable)
__check_heap_object+0x168/0x190
__check_object_size+0x1a0/0x200
dev_ethtool+0x2494/0x2b20
dev_ioctl+0x5d0/0x770
sock_do_ioctl+0xf0/0x1d0
sock_ioctl+0x3ec/0x5a0
__se_sys_ioctl+0xf0/0x160
system_call_exception+0xfc/0x1f0
system_call_common+0xf8/0x200

  The code shows below,

data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))

  The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true
  on 64-bit Book3E, which leads to the panic.

  As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va
  and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in
  the virt_addr_valid() for 64-bit, also add upper limit check to make
  sure the virt is below high_memory.

  Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start
  of lowmem, high_memory is the upper low virtual address, the check is
  suitable for 32-bit, this will fix the issue mentioned in commit
  602946ec2f90 ("powerpc: Set max_mapnr correctly") too.

On 32-bit there is a similar problem with high memory, that was fixed in
commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that
commit breaks highmem and needs to be reverted.

We can't easily fix __pa(), we have code that relies on its current
behaviour. So for now add extra checks to virt_addr_valid().

For 64-bit Book3S the extra checks are not necessary, the combination of
virt_to_pfn() and pfn_valid() should yield the correct result, but they
are harmless.

Signed-off-by: Kefeng Wang 
Reviewed-by: Christophe Leroy 
[mpe: Add additional change log detail]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20220406145802.538416-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/page.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..f2c5c26869f1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({  \
+   unsigned long _addr = (unsigned long)vaddr; \
+   _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&   \
+   pfn_valid(virt_to_pfn(_addr));  \
+})
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
-- 
2.35.1



[PATCH AUTOSEL 5.17 27/49] static_call: Properly initialise DEFINE_STATIC_CALL_RET0()

2022-04-11 Thread Sasha Levin
From: Christophe Leroy 

[ Upstream commit 5517d500829c683a358a8de04ecb2e28af629ae5 ]

When a static call is updated with __static_call_return0() as target,
arch_static_call_transform() set it to use an optimised set of
instructions which are meant to lay in the same cacheline.

But when initialising a static call with DEFINE_STATIC_CALL_RET0(),
we get a branch to the real __static_call_return0() function instead
of getting the optimised setup:

c00d8120 <__SCT__perf_snapshot_branch_stack>:
c00d8120:   4b ff ff f4 b   c00d8114 <__static_call_return0>
c00d8124:   3d 80 c0 0e lis r12,-16370
c00d8128:   81 8c 81 3c lwz r12,-32452(r12)
c00d812c:   7d 89 03 a6 mtctr   r12
c00d8130:   4e 80 04 20 bctr
c00d8134:   38 60 00 00 li  r3,0
c00d8138:   4e 80 00 20 blr
c00d813c:   00 00 00 00 .long 0x0

Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture
to setup the optimised configuration, and rework
DEFINE_STATIC_CALL_RET0() to call it:

c00d8120 <__SCT__perf_snapshot_branch_stack>:
c00d8120:   48 00 00 14 b   c00d8134 
<__SCT__perf_snapshot_branch_stack+0x14>
c00d8124:   3d 80 c0 0e lis r12,-16370
c00d8128:   81 8c 81 3c lwz r12,-32452(r12)
c00d812c:   7d 89 03 a6 mtctr   r12
c00d8130:   4e 80 04 20 bctr
c00d8134:   38 60 00 00 li  r3,0
c00d8138:   4e 80 00 20 blr
c00d813c:   00 00 00 00 .long 0x0

Signed-off-by: Christophe Leroy 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: 
https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.le...@csgroup.eu
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/static_call.h |  1 +
 arch/x86/include/asm/static_call.h |  2 ++
 include/linux/static_call.h| 20 +---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/static_call.h 
b/arch/powerpc/include/asm/static_call.h
index 0a0bc79bd1fa..de1018cc522b 100644
--- a/arch/powerpc/include/asm/static_call.h
+++ b/arch/powerpc/include/asm/static_call.h
@@ -24,5 +24,6 @@
 
 #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)  __PPC_SCT(name, "b " 
#func)
 #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)   __PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)   __PPC_SCT(name, "b 
.+20")
 
 #endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/static_call.h 
b/arch/x86/include/asm/static_call.h
index ed4f8bb6c2d9..2455d721503e 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -38,6 +38,8 @@
 #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)   \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
 
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)   \
+   ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
 
 #define ARCH_ADD_TRAMP_KEY(name)   \
asm(".pushsection .static_call_tramp_key, \"a\" \n" \
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 3e56a9751c06..e2d70435988c 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -196,6 +196,14 @@ extern long __static_call_return0(void);
};  \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+#define DEFINE_STATIC_CALL_RET0(name, _func)   \
+   DECLARE_STATIC_CALL(name, _func);   \
+   struct static_call_key STATIC_CALL_KEY(name) = {\
+   .func = __static_call_return0,  \
+   .type = 1,  \
+   };  \
+   ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+
 #define static_call_cond(name) (void)__static_call(name)
 
 #define EXPORT_STATIC_CALL(name)   \
@@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; }
};  \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+#define DEFINE_STATIC_CALL_RET0(name, _func)   \
+   DECLARE_STATIC_CALL(name, _func);   \
+   struct static_call_key STATIC_CALL_KEY(name) = {\
+   .func = __static_call_return0,  \
+   };  \
+   ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
 
 #define static_call_cond(name) (void)__static_call(name)
 
@@ -287,6 +301,9 @@ static 

[PATCH AUTOSEL 5.17 01/49] KVM: PPC: Book3S HV P9: Fix "lost kick" race

2022-04-11 Thread Sasha Levin
From: Nicholas Piggin 

[ Upstream commit c7fa848ff01dad9ed3146a6b1a7d3622131bcedd ]

When new work is created that requires attention from the hypervisor
(e.g., to inject an interrupt into the guest), fast_vcpu_kick is used to
pull the target vcpu out of the guest if it may have been running.

Therefore the work creation side looks like this:

  vcpu->arch.doorbell_request = 1;
  kvmppc_fast_vcpu_kick_hv(vcpu) {
smp_mb();
cpu = vcpu->cpu;
if (cpu != -1)
send_ipi(cpu);
  }

And the guest entry side *should* look like this:

  vcpu->cpu = smp_processor_id();
  smp_mb();
  if (vcpu->arch.doorbell_request) {
// do something (abort entry or inject doorbell etc)
  }

But currently the store and load are flipped, so it is possible for the
entry to see no doorbell pending, and the doorbell creation misses the
store to set cpu, resulting lost work (or at least delayed until the
next guest exit).

Fix this by reordering the entry operations and adding a smp_mb
between them. The P8 path appears to have a similar race which is
commented but not addressed yet.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20220303053315.1056880-2-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kvm/book3s_hv.c | 41 +---
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 791db769080d..316f61a4cb59 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
int cpu;
struct rcuwait *waitp;
 
+   /*
+* rcuwait_wake_up contains smp_mb() which orders prior stores that
+* create pending work vs below loads of cpu fields. The other side
+* is the barrier in vcpu run that orders setting the cpu fields vs
+* testing for pending work.
+*/
+
waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
++vcpu->stat.generic.halt_wakeup;
@@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
}
tvcpu->arch.prodded = 1;
-   smp_mb();
+   smp_mb(); /* This orders prodded store vs ceded load */
if (tvcpu->arch.ceded)
kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
@@ -3771,6 +3778,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
pvc = core_info.vc[sub];
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
+   /*
+* XXX: is kvmppc_start_thread called too late here?
+* It updates vcpu->cpu and vcpu->arch.thread_cpu
+* which are used by kvmppc_fast_vcpu_kick_hv(), but
+* kick is called after new exceptions become available
+* and exceptions are checked earlier than here, by
+* kvmppc_core_prepare_to_enter.
+*/
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
@@ -4492,6 +4507,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
if (need_resched() || !kvm->arch.mmu_ready)
goto out;
 
+   vcpu->cpu = pcpu;
+   vcpu->arch.thread_cpu = pcpu;
+   vc->pcpu = pcpu;
+   local_paca->kvm_hstate.kvm_vcpu = vcpu;
+   local_paca->kvm_hstate.ptid = 0;
+   local_paca->kvm_hstate.fake_suspend = 0;
+
+   /*
+* Orders set cpu/thread_cpu vs testing for pending interrupts and
+* doorbells below. The other side is when these fields are set vs
+* kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
+* kick a vCPU to notice the pending interrupt.
+*/
+   smp_mb();
+
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
@@ -4511,13 +4541,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
 
tb = mftb();
 
-   vcpu->cpu = pcpu;
-   vcpu->arch.thread_cpu = pcpu;
-   vc->pcpu = pcpu;
-   local_paca->kvm_hstate.kvm_vcpu = vcpu;
-   local_paca->kvm_hstate.ptid = 0;
-   local_paca->kvm_hstate.fake_suspend = 0;
-
__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
 
trace_kvm_guest_enter(vcpu);
@@ -4619,6 +4642,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
  out:
+   vcpu->cpu = -1;
+   vcpu->arch.thread_cpu = -1;
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
  

Re: [PATCH] powerpc/numa: Associate numa node to its cpu earlier

2022-04-11 Thread Michael Ellerman
Oscar Salvador  writes:
> On Mon, Apr 11, 2022 at 02:28:08PM +0530, Srikar Dronamraju wrote:
>> Given that my patch got accepted into powerpc tree
>> https://git.kernel.org/powerpc/c/e4ff77598a109bd36789ad5e80aba66fc53d0ffb
>> is now part of Linus tree, this line may need a slight tweak.
>
> Right.
>
> @Michael: Will you resolve the conflict, or you would rather want me to send
> v2 with the amendment?

I can resolve the conflict, it should be trivial. If there's any trouble
I'll let you know.

cheers


Re: [PATCH v2 4/4] tools/perf: Fix perf bench numa testcase to check if CPU used to bind task is online

2022-04-11 Thread Athira Rajeev



> On 09-Apr-2022, at 8:50 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Em Wed, Apr 06, 2022 at 11:21:13PM +0530, Athira Rajeev escreveu:
>> Perf numa bench test fails with error:
>> 
>> Testcase:
>> ./perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq
>> --thp  1 --no-data_rand_walk
>> 
>> Failure snippet:
>> <<>>
>> Running 'numa/mem' benchmark:
>> 
>> # Running main, "perf bench numa numa-mem -p 2 -t 1 -P 1024 -C 0,8
>> -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk"
>> 
>> perf: bench/numa.c:333: bind_to_cpumask: Assertion `!(ret)' failed.
>> <<>>
>> 
>> The Testcases uses CPU’s 0 and 8. In function "parse_setup_cpu_list",
>> There is check to see if cpu number is greater than max cpu’s possible
>> in the system ie via "if (bind_cpu_0 >= g->p.nr_cpus ||
>> bind_cpu_1 >= g->p.nr_cpus) {". But it could happen that system has
>> say 48 CPU’s, but only number of online CPU’s is 0-7. Other CPU’s
>> are offlined. Since "g->p.nr_cpus" is 48, so function will go ahead
>> and set bit for CPU 8 also in cpumask ( td->bind_cpumask).
>> 
>> bind_to_cpumask function is called to set affinity using
>> sched_setaffinity and the cpumask. Since the CPU8 is not present,
>> set affinity will fail here with EINVAL. Fix this issue by adding a
>> check to make sure that, CPU’s provided in the input argument values
>> are online before proceeding further and skip the test. For this,
>> include new helper function "is_cpu_online" in
>> "tools/perf/util/header.c".
>> 
>> Since "BIT(x)" definition will get included from header.h, remove
>> that from bench/numa.c
>> 
>> Tested-by: Disha Goel 
>> Signed-off-by: Athira Rajeev 
>> Reported-by: Disha Goel 
>> ---
>> tools/perf/bench/numa.c  |  8 ++--
>> tools/perf/util/header.c | 43 
>> tools/perf/util/header.h |  1 +
>> 3 files changed, 50 insertions(+), 2 deletions(-)
>> 
>> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
>> index 29e41e32bd88..7992d79b3e41 100644
>> --- a/tools/perf/bench/numa.c
>> +++ b/tools/perf/bench/numa.c
>> @@ -34,6 +34,7 @@
>> #include 
>> #include 
>> 
>> +#include "../util/header.h"
>> #include 
>> #include 
>> 
>> @@ -616,6 +617,11 @@ static int parse_setup_cpu_list(void)
>>  return -1;
>>  }
>> 
>> +if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) 
>> != 1) {
>> +printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 
>> is offline\n");
>> +return -1;
>> +}
>> +
>>  BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
>>  BUG_ON(bind_cpu_0 > bind_cpu_1);
>> 
>> @@ -786,8 +792,6 @@ static int parse_nodes_opt(const struct option *opt 
>> __maybe_unused,
>>  return parse_node_list(arg);
>> }
>> 
>> -#define BIT(x) (1ul << x)
>> -
>> static inline uint32_t lfsr_32(uint32_t lfsr)
>> {
>>  const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index 6da12e522edc..3f5fcf5d4b3f 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -983,6 +983,49 @@ static int write_dir_format(struct feat_fd *ff,
>>  return do_write(ff, >dir.version, sizeof(data->dir.version));
>> }
>> 
>> +#define SYSFS "/sys/devices/system/cpu/"
> 
> Please use
> 
> int sysfs__read_str(const char *entry, char **buf, size_t *sizep)

Hi Arnaldo,

Sure, I will send a V3 for this separately which uses “sysfs__read_str”

Thanks for the review
Athira
> 
> See how to use it in the smt_on() function at tools/perf/util/smt.c, for
> example.
> 
> Now looking at the first patches in the series.
> 
> - Arnaldo
> 
>> +/*
>> + * Check whether a CPU is online
>> + *
>> + * Returns:
>> + * 1 -> if CPU is online
>> + * 0 -> if CPU is offline
>> + *-1 -> error case
>> + */
>> +int is_cpu_online(unsigned int cpu)
>> +{
>> +char sysfs_cpu[255];
>> +char buf[255];
>> +struct stat statbuf;
>> +size_t len;
>> +int fd;
>> +
>> +snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u", cpu);
>> +
>> +if (stat(sysfs_cpu, ) != 0)
>> +return 0;
>> +
>> +/*
>> + * Check if /sys/devices/system/cpu/cpux/online file
>> + * exists. In kernels without CONFIG_HOTPLUG_CPU, this
>> + * file won't exist.
>> + */
>> +snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u/online", cpu);
>> +if (stat(sysfs_cpu, ) != 0)
>> +return 1;
>> +
>> +fd = open(sysfs_cpu, O_RDONLY);
>> +if (fd == -1)
>> +return -1;
>> +
>> +len = read(fd, buf, sizeof(buf) - 1);
>> +buf[len] = '\0';
>> +close(fd);
>> +
>> +return strtoul(buf, NULL, 16);
>> +}
>> +
>> #ifdef HAVE_LIBBPF_SUPPORT
>> static int write_bpf_prog_info(struct feat_fd *ff,
>> struct evlist *evlist __maybe_unused)
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index c9e3265832d9..0eb4bc29a5a4 100644
>> --- 

Re: [PATCH] powerpc/numa: Associate numa node to its cpu earlier

2022-04-11 Thread Oscar Salvador
On Mon, Apr 11, 2022 at 02:28:08PM +0530, Srikar Dronamraju wrote:
> Given that my patch got accepted into powerpc tree
> https://git.kernel.org/powerpc/c/e4ff77598a109bd36789ad5e80aba66fc53d0ffb
> is now part of Linus tree, this line may need a slight tweak.

Right.

@Michael: Will you resolve the conflict, or you would rather want me to send
v2 with the amendment?

-- 
Oscar Salvador
SUSE Labs


Re: False positive kmemleak report for dtb properties names on powerpc

2022-04-11 Thread Christophe Leroy
Hi Ariel

Le 09/04/2022 à 15:47, Ariel Marcovitch a écrit :
> Hi Christophe, did you get the chance to look at this?

I tested something this morning, it works for me, see below

> 
> On 23/03/2022 21:06, Mike Rapoport wrote:
>> Hi Catalin,
>>
>> On Wed, Mar 23, 2022 at 05:22:38PM +, Catalin Marinas wrote:
>>> Hi Ariel,
>>>
>>> On Fri, Feb 18, 2022 at 09:45:51PM +0200, Ariel Marcovitch wrote:
 I was running a powerpc 32bit kernel (built using
 qemu_ppc_mpc8544ds_defconfig
 buildroot config, with enabling DEBUGFS+KMEMLEAK+HIGHMEM in the kernel
 config)

...

 I don't suppose I can just shuffle the calls in setup_arch() around, 
 so I
 wanted to hear your opinions first
>>> I think it's better if we change the logic than shuffling the calls.
>>> IIUC MEMBLOCK_ALLOC_ACCESSIBLE means that __va() works on the phys
>>> address return by memblock, so something like below (untested):
>> MEMBLOCK_ALLOC_ACCESSIBLE means "anywhere", see commit e63075a3c937
>> ("memblock: Introduce default allocation limit and use it to replace
>> explicit ones"), so it won't help to detect high memory.
>>
>> If I remember correctly, ppc initializes memblock *very* early, so 
>> setting
>> max_low_pfn along with lowmem_end_addr in
>> arch/powerpc/mm/init_32::MMU_init() makes sense to me.
>>
>> Maybe ppc folks have other ideas...
>> I've added Christophe who works on ppc32 these days.

I think memblock is already available at the end of MMU_init() on PPC32 
and at the end of early_setup() on PPC64. It means it is ready when we 
enter setup_arch().

I tested the change below, it works for me, I don't get any kmemleak 
report anymore.

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 518ae5aa9410..9f4e50b176c9 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -840,6 +840,9 @@ void __init setup_arch(char **cmdline_p)
/* Set a half-reasonable default so udelay does something sensible */
loops_per_jiffy = 5 / HZ;

+   /* Parse memory topology */
+   mem_topology_setup();
+
/* Unflatten the device-tree passed by prom_init or kexec */
unflatten_device_tree();

@@ -882,9 +885,6 @@ void __init setup_arch(char **cmdline_p)
/* Check the SMT related command line arguments (ppc64). */
check_smt_enabled();

-   /* Parse memory topology */
-   mem_topology_setup();
-
/*
 * Release secondary cpus out of their spinloops at 0x60 now that
 * we can map physical -> logical CPU ids.
---


Christophe

Re: [PATCH] powerpc/numa: Associate numa node to its cpu earlier

2022-04-11 Thread Srikar Dronamraju
* Oscar Salvador  [2022-04-11 09:49:34]:

> powerpc is the only platform that do not rely on
> cpu_up()->try_online_node() to bring up a numa node,
> and special cases it, instead, deep in its own machinery:
> 
> dlpar_online_cpu
>  find_and_online_cpu_nid
>   try_online_node
> 
> This should not be needed, but the thing is that the try_online_node()
> from cpu_up() will not apply on the right node, because cpu_to_node()
> will return the old mapping numa<->cpu that gets set on boot stage
> for all possible cpus.
> 
> That can be seen easily if we try to print out the numa node passed
> to try_online_node() in cpu_up().
> 
> The thing is that the numa<->cpu mapping does not get updated till a much
> later stage in start_secondary:
> 
> start_secondary:
>  set_numa_node(numa_cpu_lookup_table[cpu])
> 
> But we do not really care, as we already now the
> CPU <-> NUMA associativity back in find_and_online_cpu_nid(),
> so let us make use of that and set the proper numa<->cpu mapping,
> so cpu_to_node() in cpu_up() returns the right node and
> try_online_node() can do its work.
> 
> Signed-off-by: Oscar Salvador 
> Reviewed-by: Srikar Dronamraju 
> Tested-by: Geetika Moolchandani 
> ---
>  arch/powerpc/include/asm/topology.h  |  8 ++-
>  arch/powerpc/mm/numa.c   | 31 
> +++-
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |  2 +-
>  3 files changed, 11 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/topology.h 
> b/arch/powerpc/include/asm/topology.h
> index 36fcafb1fd6d..6ae1b2dce83e 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -111,14 +111,10 @@ static inline void unmap_cpu_from_node(unsigned long 
> cpu) {}
>  #endif /* CONFIG_NUMA */
> 
>  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
> -extern int find_and_online_cpu_nid(int cpu);
> +extern void find_and_update_cpu_nid(int cpu);
>  extern int cpu_to_coregroup_id(int cpu);
>  #else
> -static inline int find_and_online_cpu_nid(int cpu)
> -{
> - return 0;
> -}
> -
> +static inline void find_and_update_cpu_nid(int cpu) {}
>  static inline int cpu_to_coregroup_id(int cpu)
>  {
>  #ifdef CONFIG_SMP
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index b9b7fefbb64b..b5bc8b1a833d 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1423,43 +1423,28 @@ static long vphn_get_associativity(unsigned long cpu,
>   return rc;
>  }
> 
> -int find_and_online_cpu_nid(int cpu)
> +void find_and_update_cpu_nid(int cpu)
>  {
>   __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>   int new_nid;
> 
>   /* Use associativity from first thread for all siblings */
>   if (vphn_get_associativity(cpu, associativity))
> - return cpu_to_node(cpu);
> + return;
> 
> + /* Do not have previous associativity, so find it now. */
>   new_nid = associativity_to_nid(associativity);
> +
>   if (new_nid < 0 || !node_possible(new_nid))
>   new_nid = first_online_node;
> -
> - if (NODE_DATA(new_nid) == NULL) {

Given that my patch got accepted into powerpc tree
https://git.kernel.org/powerpc/c/e4ff77598a109bd36789ad5e80aba66fc53d0ffb
is now part of Linus tree, this line may need a slight tweak.

> -#ifdef CONFIG_MEMORY_HOTPLUG
> - /*
> -  * Need to ensure that NODE_DATA is initialized for a node from
> -  * available memory (see memblock_alloc_try_nid). If unable to
> -  * init the node, then default to nearest node that has memory
> -  * installed. Skip onlining a node if the subsystems are not
> -  * yet initialized.
> -  */
> - if (!topology_inited || try_online_node(new_nid))
> - new_nid = first_online_node;
> -#else
> - /*
> -  * Default to using the nearest node that has memory installed.
> -  * Otherwise, it would be necessary to patch the kernel MM code
> -  * to deal with more memoryless-node error conditions.
> + else
> + /* Associate node <-> cpu, so cpu_up() calls
> +  * try_online_node() on the right node.
>*/
> - new_nid = first_online_node;
> -#endif
> - }
> + set_cpu_numa_node(cpu, new_nid);
> 
>   pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
>   cpu, new_nid);
> - return new_nid;
>  }
> 
>  int cpu_to_coregroup_id(int cpu)
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index b81fc846d99c..0f8cd8b06432 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -398,7 +398,7 @@ static int dlpar_online_cpu(struct device_node *dn)
>   if (get_hard_smp_processor_id(cpu) != thread)
>   continue;
>  

[RFC v4 PATCH 5/5] powerpc/crash hp: add crash hotplug support for kexec_load

2022-04-11 Thread Sourabh Jain
The kernel changes needed for crash hotplug support for kexec_load system
calls are similar to kexec_file_load (which has already been implemented
in earlier patches) except for finding the index of the FDT segment in the
kexec segment array. Since the kexec segment array is prepared by the
kexec tool in the userspace, the kernel is not aware of at which index FDT
segment is present.

Now to enable crash hotplug support for the kexec_load case, the crash
hotplug handler is updated to identify the index at which the FDT segment
is present in the kexec segment array by comparing the first 32 bits of
every kexec segment with the FDT magic number.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/kexec/core_64.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 62f77cc86407..e3f224f8eb3a 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -480,7 +480,9 @@ int update_cpus_node(void *fdt)
 void arch_crash_hotplug_handler(struct kimage *image, unsigned int hp_action,
unsigned long a, unsigned long b)
 {
-   void *fdt;
+   void *fdt, *ptr;
+   unsigned int n;
+   unsigned long mem, memsz;
 
/* No action needed for CPU hot-unplug */
if (hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
@@ -492,6 +494,24 @@ void arch_crash_hotplug_handler(struct kimage *image, 
unsigned int hp_action,
return;
}
 
+   /* Sine kexec segments for kexec_load system call is prepred by
+* kexec tool in userspace we need loop through all the segments
+* to find out segment index corresponds FDT segment. In case of
+* kexec_file_load it is discovered during the load itself.
+*/
+   if (!image->arch.fdt_index_valid) {
+   for (n = 0; n < image->nr_segments; n++) {
+   mem = image->segment[n].mem;
+   memsz = image->segment[n].memsz;
+   ptr = __va(mem);
+   if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+   image->arch.fdt_index = n;
+   image->arch.fdt_index_valid = true;
+   break;
+   }
+   }
+   }
+
/* Must have valid FDT index */
if (!image->arch.fdt_index_valid) {
pr_err("crash hp: unable to locate FDT segment");
-- 
2.35.1



[RFC v4 PATCH 0/5] In kernel handling of CPU hotplug events for crash kernel

2022-04-11 Thread Sourabh Jain
This patch series implements the crash hotplug handler on PowerPC introduced
by https://lkml.org/lkml/2022/3/3/674 patch series.


The Problem:

Post hotplug/DLPAR events the capture kernel holds stale information about the
system. Dump collection with stale capture kernel might end up in dump capture
failure or an inaccurate dump collection.


Existing solution:
==
The existing solution to keep the capture kernel up-to-date is observe the
hotplug event via udev rule and trigger a full capture kernel reload post
hotplug event. 

Shortcomings:

- Leaves a window where kernel crash might not lead to successful dump
  collection.
- Reloading all kexec components for each hotplug is inefficient. Since only
  one or two kexec components need to be updated due to hotplug event reloading
  all kexec component is redundant.
- udev rules are prone to races if hotplug events are frequent.

More about issues with an existing solution is posted here:
 - https://lkml.org/lkml/2020/12/14/532
 - https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-February/240254.html

Proposed Solution:
==
Instead of reloading all kexec segments on hotplug event, this patch series
focuses on updating only the relevant kexec segment. Once the kexec
segments are loaded in the kernel reserved area then an arch-specific hotplug 
handler
will update the relevant kexec segment based on hotplug event type.

As mentioned above this patch series implemented a PowerPC crash hotplug
handler for the CPU. The crash hotplug handler memory is in our TODO list.


A couple of minor changes are required to realize the benefit of the patch
series:

- disalble the udev rule:

  comment out the below line in kdump udev rule file:
  RHEL: /usr/lib/udev/rules.d/98-kexec.rules
  # SUBSYSTEM=="cpu", ACTION=="online", GOTO="kdump_reload_cpu"

- kexec tool needs to be updated with patch for kexec_load system call
  to work (not needed if -s option is used during kexec panic load):

---
diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c 
b/kexec/arch/ppc64/kexec-elf-ppc64.c
index 695b8b0..1dc6490 100644
--- a/kexec/arch/ppc64/kexec-elf-ppc64.c
+++ b/kexec/arch/ppc64/kexec-elf-ppc64.c
@@ -45,6 +45,29 @@ uint64_t initrd_base, initrd_size;
 unsigned char reuse_initrd = 0;
 const char *ramdisk;
 
+#define MAX_CORE 256
+#define PER_CORE_NODE_SIZE 1500
+
+/**
+ * get_crash_fdt_mem_sz() - calcuate mem size for crash kernel FDT
+ * @fdt: pointer to crash kernel FDT
+ *
+ * Calculate the buffer space needed to add more CPU nodes in FDT after
+ * capture kenrel load due to hot add events.
+ *
+ * Some assumption are made to calculate the additional buffer size needed
+ * to accommodate future hot add CPUs to the crash FDT. The maximum core count
+ * in the system would not go beyond MAX_CORE and memory needed to store per 
core
+ * date in FDT is PER_CORE_NODE_SIZE.
+ *
+ * Certainly MAX_CORE count can be replaced with possible core count and
+ * PER_CORE_NODE_SIZE to some standard value instead of randomly observed
+ * core size value on Power9 LPAR.
+ */
+static unsigned int get_crash_fdt_mem_sz(void *fdt) {
+   return fdt_totalsize(fdt) + (PER_CORE_NODE_SIZE * MAX_CORE);
+}
+
 int elf_ppc64_probe(const char *buf, off_t len)
 {
struct mem_ehdr ehdr;
@@ -179,6 +202,7 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, 
off_t len,
uint64_t max_addr, hole_addr;
char *seg_buf = NULL;
off_t seg_size = 0;
+   unsigned int mem_sz = 0;
struct mem_phdr *phdr;
size_t size;
 #ifdef NEED_RESERVE_DTB
@@ -329,7 +353,13 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, 
off_t len,
if (result < 0)
return result;
 
-   my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size,
+   if (info->kexec_flags & KEXEC_ON_CRASH) {
+   mem_sz = get_crash_fdt_mem_sz((void *)seg_buf);
+   fdt_set_totalsize(seg_buf, mem_sz);
+   info->fdt_index = info->nr_segments;
+   }
+
+   my_dt_offset = add_buffer(info, seg_buf, seg_size, mem_sz,
0, 0, max_addr, -1);
 
 #ifdef NEED_RESERVE_DTB
diff --git a/kexec/kexec.c b/kexec/kexec.c
index f63b36b..846b1a8 100644
--- a/kexec/kexec.c
+++ b/kexec/kexec.c
@@ -672,6 +672,9 @@ static void update_purgatory(struct kexec_info *info)
if (info->segment[i].mem == (void *)info->rhdr.rel_addr) {
continue;
}
+   if (info->fdt_index == i)
+   continue;
+
sha256_update(, info->segment[i].buf,
  info->segment[i].bufsz);
nullsz = info->segment[i].memsz - info->segment[i].bufsz;
diff --git a/kexec/kexec.h b/kexec/kexec.h
index 595dd68..0906a1b 100644
--- a/kexec/kexec.h
+++ b/kexec/kexec.h
@@ -169,6 +169,7 @@ struct kexec_info {
int command_line_len;
 
  

[RFC v4 PATCH 1/5] powerpc/kexec: make update_cpus_node non-static

2022-04-11 Thread Sourabh Jain
Make the update_cpus_node function non-static and export it for
usage in other kexec components.

The update_cpus_node definition is moved to core_64.c so that it
can be used with both kexec_load and kexec_file_load system calls.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/include/asm/kexec.h  |  1 +
 arch/powerpc/kexec/core_64.c  | 88 +++
 arch/powerpc/kexec/file_load_64.c | 87 --
 3 files changed, 89 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 8ebdd23d987c..e1288826e22e 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -127,6 +127,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage 
*image);
 int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
unsigned long initrd_load_addr,
unsigned long initrd_len, const char *cmdline);
+int update_cpus_node(void *fdt);
 #endif /* CONFIG_PPC64 */
 
 #endif /* CONFIG_KEXEC_FILE */
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 635b5fc30b53..249d2632526d 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -378,6 +379,93 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
 }
 
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ *  them to fdt.
+ * @fdt:Flattened device tree of the kernel
+ * @node_offset:offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node 
*dn)
+{
+   int ret = 0;
+   struct property *pp;
+
+   if (!dn)
+   return -EINVAL;
+
+   for_each_property_of_node(dn, pp) {
+   ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, 
pp->length);
+   if (ret < 0) {
+   pr_err("Unable to add %s property: %s\n", pp->name, 
fdt_strerror(ret));
+   return ret;
+   }
+   }
+   return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ *device node.
+ * @fdt:  Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int update_cpus_node(void *fdt)
+{
+   struct device_node *cpus_node, *dn;
+   int cpus_offset, cpus_subnode_offset, ret = 0;
+
+   cpus_offset = fdt_path_offset(fdt, "/cpus");
+   if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+   pr_err("Malformed device tree: error reading /cpus node: %s\n",
+  fdt_strerror(cpus_offset));
+   return cpus_offset;
+   }
+
+   if (cpus_offset > 0) {
+   ret = fdt_del_node(fdt, cpus_offset);
+   if (ret < 0) {
+   pr_err("Error deleting /cpus node: %s\n", 
fdt_strerror(ret));
+   return -EINVAL;
+   }
+   }
+
+   /* Add cpus node to fdt */
+   cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+   if (cpus_offset < 0) {
+   pr_err("Error creating /cpus node: %s\n", 
fdt_strerror(cpus_offset));
+   return -EINVAL;
+   }
+
+   /* Add cpus node properties */
+   cpus_node = of_find_node_by_path("/cpus");
+   ret = add_node_props(fdt, cpus_offset, cpus_node);
+   of_node_put(cpus_node);
+   if (ret < 0)
+   return ret;
+
+   /* Loop through all subnodes of cpus and add them to fdt */
+   for_each_node_by_type(dn, "cpu") {
+   cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, 
dn->full_name);
+   if (cpus_subnode_offset < 0) {
+   pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+  fdt_strerror(cpus_subnode_offset));
+   ret = cpus_subnode_offset;
+   goto out;
+   }
+
+   ret = add_node_props(fdt, cpus_subnode_offset, dn);
+   if (ret < 0)
+   goto out;
+   }
+out:
+   of_node_put(dn);
+   return ret;
+}
+
 #ifdef CONFIG_PPC_64S_HASH_MMU
 /* Values we need to export to the second kernel via the device tree. */
 static unsigned long htab_base;
diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 07da6bf1cf24..57f991b0a9da 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -951,93 +951,6 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage 
*image)
return (unsigned int)(usm_entries * sizeof(u64));
 }
 
-/**
- * add_node_props - Reads node 

[RFC v4 PATCH 4/5] powerpc/crash hp: add crash hotplug support for kexec_file_load

2022-04-11 Thread Sourabh Jain
Two major changes are done to enable the crash CPU hotplug handler.
Firstly, updated the kexec load path to prepare kimage for hotplug
changes, and secondly, implemented the crash hotplug handler.

On the kexec load path, the memsz allocation of the crash FDT segment
is updated to ensure that it has sufficient buffer space to accommodate
future hot add CPUs. Initialized the kimage members to track the kexec
FDT segment.

The crash hotplug handler updates the cpus node of crash FDT. While we
update crash FDT the kexec_crash_image is marked invalid and restored
after FDT update to avoid race.

Since memory crash hotplug support is not there yet the crash hotplug
the handler simply warns the user and returns.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/kexec/core_64.c | 46 ++
 arch/powerpc/kexec/elf_64.c  | 74 
 2 files changed, 120 insertions(+)

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 249d2632526d..62f77cc86407 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -466,6 +466,52 @@ int update_cpus_node(void *fdt)
return ret;
 }
 
+#ifdef CONFIG_CRASH_HOTPLUG
+/**
+ * arch_crash_hotplug_handler() - Handle hotplug FDT changes
+ * @image: the active struct kimage
+ * @hp_action: the hot un/plug action being handled
+ * @a: first parameter dependent upon hp_action
+ * @b: first parameter dependent upon hp_action
+ *
+ * To accurately reflect CPU hot un/plug changes, the FDT
+ * must be updated with the new list of CPUs and memories.
+ */
+void arch_crash_hotplug_handler(struct kimage *image, unsigned int hp_action,
+   unsigned long a, unsigned long b)
+{
+   void *fdt;
+
+   /* No action needed for CPU hot-unplug */
+   if (hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+   return;
+
+   /* crash update on memory hotplug is not support yet */
+   if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY || hp_action == 
KEXEC_CRASH_HP_ADD_MEMORY) {
+   pr_info_once("crash hp: crash update is not supported with 
memory hotplug\n");
+   return;
+   }
+
+   /* Must have valid FDT index */
+   if (!image->arch.fdt_index_valid) {
+   pr_err("crash hp: unable to locate FDT segment");
+   return;
+   }
+
+   fdt = __va((void *)image->segment[image->arch.fdt_index].mem);
+
+   /* Temporarily invalidate the crash image while it is replaced */
+   xchg(_crash_image, NULL);
+
+   /* update FDT to refelect changes to CPU resrouces */
+   if (update_cpus_node(fdt))
+   pr_err("crash hp: failed to update crash FDT");
+
+   /* The crash image is now valid once again */
+   xchg(_crash_image, image);
+}
+#endif /* CONFIG_CRASH_HOTPLUG */
+
 #ifdef CONFIG_PPC_64S_HASH_MMU
 /* Values we need to export to the second kernel via the device tree. */
 static unsigned long htab_base;
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index eeb258002d1e..9dc774548ce4 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -24,6 +24,67 @@
 #include 
 #include 
 
+#include 
+#include 
+
+#ifdef CONFIG_CRASH_HOTPLUG
+
+/**
+ * get_cpu_node_sz() - Calculate the space needed to store a CPU device type 
node
+ * in FDT. The calculation is done based on the existing 
CPU
+ * node in unflatten device tree. Loop through all the
+ * properties of the very first CPU type device node found 
in
+ * unflatten device tree and returns the sum of the 
property
+ * length and property string size of all properties of a 
CPU
+ * node.
+ */
+static int get_cpu_node_sz(void) {
+   struct device_node *dn = NULL;
+   struct property *pp;
+   int cpu_node_size = 0;
+
+   dn = of_find_node_by_type(NULL, "cpu");
+
+   if (!dn) {
+   pr_warn("Unable to locate cpu device_type node.\n");
+   goto out;
+   }
+
+   /* Every node in FDT starts with FDT_BEGIN_NODE and ends with
+* FDT_END_NODE that takes one byte each.
+*/
+   cpu_node_size = 2;
+
+   for_each_property_of_node(dn, pp) {
+   /* For each property add two bytes extra. One for string null
+* character for property name and other for FDT property start
+* tag FDT_PROP.
+*/
+   cpu_node_size = cpu_node_size + pp->length + strlen(pp->name) + 
2;
+   }
+
+out:
+   return cpu_node_size;
+}
+
+/**
+ * get_crash_fdt_mem_sz() - calcuate mem size for crash kernel FDT
+ * @fdt: pointer to crash kernel FDT
+ *
+ * Calculate the buffer space needed to add more CPU nodes in crash FDT
+ * post capture kenrel load due to CPU hotplug events.
+ */
+static unsigned int get_crash_fdt_mem_sz(void *fdt)
+{
+   int fdt_cpu_nodes_sz, 

[RFC v4 PATCH 2/5] powerpc/crash hp: introduce a new config option CRASH_HOTPLUG

2022-04-11 Thread Sourabh Jain
The option CRASH_HOTPLUG enables, in kernel update to kexec segments on
hotplug events.

All the updates needed on the capture kernel load path in the kernel for
both kexec_load and kexec_file_load system will be kept under this config.

Signed-off-by: Sourabh Jain 
Reviewed-by: Eric DeVolder 
---
 arch/powerpc/Kconfig | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b779603978e1..777db33f75b5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -623,6 +623,17 @@ config FA_DUMP
  If unsure, say "y". Only special kernels like petitboot may
  need to say "N" here.
 
+config CRASH_HOTPLUG
+   bool "kernel updates of crash kexec segments"
+   depends on CRASH_DUMP && (HOTPLUG_CPU) && KEXEC_FILE
+   help
+ An efficient way to keep the capture kernel up-to-date with CPU
+ hotplug events. On CPU hotplug event the kexec segments of capture
+ kernel becomes stale and need to be updated with latest CPU data.
+ In this method the kernel performs minimal update to only relevant
+ kexec segments on CPU hotplug event, instead of triggering full
+ capture kernel reload from userspace using udev rule.
+
 config PRESERVE_FA_DUMP
bool "Preserve Firmware-assisted dump"
depends on PPC64 && PPC_POWERNV && !FA_DUMP
-- 
2.35.1



[RFC v4 PATCH 3/5] powrepc/crash hp: update kimage_arch struct

2022-04-11 Thread Sourabh Jain
Two new members fdt_index and fdt_index_valid are added in kimage_arch
struct to track the FDT kexec segment. These new members of kimage_arch
struct will help the crash hotplug handler to easily access the FDT
segment from the kexec segment array. Otherwise, we have to loop through
all kexec segments to find the FDT segments.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/include/asm/kexec.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index e1288826e22e..19c2cab6a880 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -104,6 +104,8 @@ extern const struct kexec_file_ops kexec_elf64_ops;
 struct kimage_arch {
struct crash_mem *exclude_ranges;
 
+   int fdt_index;
+   bool fdt_index_valid;
unsigned long backup_start;
void *backup_buf;
void *fdt;
-- 
2.35.1



[PATCH v3 08/12] serial: General support for multipoint addresses

2022-04-11 Thread Ilpo Järvinen
Add generic support for serial multipoint addressing. Two new
ioctls are added. TIOCSADDR is used to indicate the
destination/receive address. TIOCGADDR returns the current
address in use. The driver should implement set_addr and get_addr
to support addressing mode.

Adjust ADDRB clearing to happen only if driver does not provide
set_addr (=the driver doesn't support address mode).

This change is necessary for supporting devices with RS485
multipoint addressing [*]. A following patch in the patch series
adds support for Synopsys Designware UART capable for 9th bit
addressing mode. In this mode, 9th bit is used to indicate an
address (byte) within the communication line. The 9th bit
addressing mode is selected using ADDRB introduced by the
previous patch.

Transmit addresses / receiver filter are specified by setting
the flags SER_ADDR_DEST and/or SER_ADDR_RECV. When the user
supplies the transmit address, in the 9bit addressing mode it is
sent out immediately with the 9th bit set to 1. After that, the
subsequent normal data bytes are sent with 9th bit as 0 and they
are intended to the device with the given address. It is up to
receiver to enforce the filter using SER_ADDR_RECV. When userspace
has supplied the receive address, the driver is expected to handle
the matching of the address and only data with that address is
forwarded to the user. Both SER_ADDR_DEST and SER_ADDR_RECV can
be given at the same time in a single call if the addresses are
the same.

The user can clear the receive filter with SER_ADDR_RECV_CLEAR.

[*] Technically, RS485 is just an electronic spec and does not
itself specify the 9th bit addressing mode but 9th bit seems
at least "semi-standard" way to do addressing with RS485.

Cc: linux-...@vger.kernel.org
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: linux-al...@vger.kernel.org
Cc: Thomas Bogendoerfer 
Cc: linux-m...@vger.kernel.org
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
Cc: linux-par...@vger.kernel.org
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: linux...@vger.kernel.org
Cc: "David S. Miller" 
Cc: sparcli...@vger.kernel.org
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: linux-xte...@linux-xtensa.org
Cc: Arnd Bergmann 
Cc: linux-a...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Ilpo Järvinen 
---
 .../driver-api/serial/serial-rs485.rst| 23 ++-
 arch/alpha/include/uapi/asm/ioctls.h  |  3 +
 arch/mips/include/uapi/asm/ioctls.h   |  3 +
 arch/parisc/include/uapi/asm/ioctls.h |  3 +
 arch/powerpc/include/uapi/asm/ioctls.h|  3 +
 arch/sh/include/uapi/asm/ioctls.h |  3 +
 arch/sparc/include/uapi/asm/ioctls.h  |  3 +
 arch/xtensa/include/uapi/asm/ioctls.h |  3 +
 drivers/tty/serial/8250/8250_core.c   |  2 +
 drivers/tty/serial/serial_core.c  | 62 ++-
 drivers/tty/tty_io.c  |  2 +
 include/linux/serial_core.h   |  6 ++
 include/uapi/asm-generic/ioctls.h |  3 +
 include/uapi/linux/serial.h   |  8 +++
 14 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst 
b/Documentation/driver-api/serial/serial-rs485.rst
index 6bc824f948f9..2f45f007fa5b 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -95,7 +95,28 @@ RS485 Serial Communications
/* Error handling. See errno. */
}
 
-5. References
+5. Multipoint Addressing
+
+
+   The Linux kernel provides serial_addr structure to handle addressing within
+   multipoint serial communications line such as RS485. 9th bit addressiong 
mode
+   is enabled by adding ADDRB flag in termios c_cflag.
+
+   Serial core calls device specific set/get_addr in response to TIOCSADDR and
+   TIOCGADDR ioctls with a pointer to serial_addr. Destination and receive
+   address can be specified using serial_addr flags field. Receive address may
+   also be cleared using flags. Once an address is set, the communication
+   can occur only with the particular device and other peers are filtered out.
+   It is left up to the receiver side to enforce the filtering.
+
+   Address flags:
+   - SER_ADDR_RECV: Receive (filter) address.
+   - SER_ADDR_RECV_CLEAR: Clear receive filter (only for TIOCSADDR).
+   - SER_ADDR_DEST: Destination address.
+
+   Note: not all devices supporting RS485 support multipoint addressing.
+
+6. References
 =
 
  [1]   include/uapi/linux/serial.h
diff --git a/arch/alpha/include/uapi/asm/ioctls.h 
b/arch/alpha/include/uapi/asm/ioctls.h
index 971311605288..500cab3e1d6b 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -125,4 +125,7 @@
 #define TIOCMIWAIT 0x545C  /* wait for a change on serial input line(s) */
 #define 

[PATCH v3 07/12] serial: termbits: ADDRB to indicate 9th bit addressing mode

2022-04-11 Thread Ilpo Järvinen
Add ADDRB to termbits to indicate 9th bit addressing mode.
This change is necessary for supporting devices with RS485
multipoint addressing [*]. A later patch in the patch series
adds support for Synopsys Designware UART capable for 9th bit
addressing mode. In this mode, 9th bit is used to indicate an
address (byte) within the communication line. The 9th bit
addressing mode is selected using ADDRB introduced by an earlier
patch.

[*] Technically, RS485 is just an electronic spec and does not
itself specify the 9th bit addressing mode but 9th bit seems
at least "semi-standard" way to do addressing with RS485.

Cc: linux-...@vger.kernel.org
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: linux-al...@vger.kernel.org
Cc: Thomas Bogendoerfer 
Cc: linux-m...@vger.kernel.org
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
Cc: linux-par...@vger.kernel.org
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: "David S. Miller" 
Cc: sparcli...@vger.kernel.org
Cc: Arnd Bergmann 
Cc: linux-a...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Signed-off-by: Ilpo Järvinen 
---
 arch/alpha/include/uapi/asm/termbits.h   | 1 +
 arch/mips/include/uapi/asm/termbits.h| 1 +
 arch/parisc/include/uapi/asm/termbits.h  | 1 +
 arch/powerpc/include/uapi/asm/termbits.h | 1 +
 arch/sparc/include/uapi/asm/termbits.h   | 1 +
 drivers/char/pcmcia/synclink_cs.c| 2 ++
 drivers/ipack/devices/ipoctal.c  | 2 ++
 drivers/mmc/core/sdio_uart.c | 2 ++
 drivers/net/usb/hso.c| 3 ++-
 drivers/s390/char/tty3270.c  | 3 +++
 drivers/staging/greybus/uart.c   | 2 ++
 drivers/tty/amiserial.c  | 6 +-
 drivers/tty/moxa.c   | 1 +
 drivers/tty/mxser.c  | 1 +
 drivers/tty/serial/serial_core.c | 2 ++
 drivers/tty/synclink_gt.c| 2 ++
 drivers/tty/tty_ioctl.c  | 2 ++
 drivers/usb/class/cdc-acm.c  | 2 ++
 drivers/usb/serial/usb-serial.c  | 6 --
 include/uapi/asm-generic/termbits.h  | 1 +
 net/bluetooth/rfcomm/tty.c   | 2 ++
 21 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/termbits.h 
b/arch/alpha/include/uapi/asm/termbits.h
index 4575ba34a0ea..0c123e715486 100644
--- a/arch/alpha/include/uapi/asm/termbits.h
+++ b/arch/alpha/include/uapi/asm/termbits.h
@@ -180,6 +180,7 @@ struct ktermios {
 #define HUPCL  0004
 
 #define CLOCAL 0010
+#define ADDRB  0040/* address bit */
 #define CMSPAR   0100  /* mark or space (stick) parity */
 #define CRTSCTS  0200  /* flow control */
 
diff --git a/arch/mips/include/uapi/asm/termbits.h 
b/arch/mips/include/uapi/asm/termbits.h
index dfeffba729b7..4732d31b0e4e 100644
--- a/arch/mips/include/uapi/asm/termbits.h
+++ b/arch/mips/include/uapi/asm/termbits.h
@@ -182,6 +182,7 @@ struct ktermios {
 #define B350 0010016
 #define B400 0010017
 #define CIBAUD   00200360  /* input baud rate */
+#define ADDRB0040  /* address bit */
 #define CMSPAR   0100  /* mark or space (stick) parity */
 #define CRTSCTS  0200  /* flow control */
 
diff --git a/arch/parisc/include/uapi/asm/termbits.h 
b/arch/parisc/include/uapi/asm/termbits.h
index 40e920f8d683..d6bbd10d92ba 100644
--- a/arch/parisc/include/uapi/asm/termbits.h
+++ b/arch/parisc/include/uapi/asm/termbits.h
@@ -159,6 +159,7 @@ struct ktermios {
 #define  B350 0010016
 #define  B400 0010017
 #define CIBAUD00200360 /* input baud rate */
+#define ADDRB0040  /* address bit */
 #define CMSPAR0100  /* mark or space (stick) parity */
 #define CRTSCTS   0200  /* flow control */
 
diff --git a/arch/powerpc/include/uapi/asm/termbits.h 
b/arch/powerpc/include/uapi/asm/termbits.h
index ed18bc61f63d..c6a033732f39 100644
--- a/arch/powerpc/include/uapi/asm/termbits.h
+++ b/arch/powerpc/include/uapi/asm/termbits.h
@@ -171,6 +171,7 @@ struct ktermios {
 #define HUPCL  0004
 
 #define CLOCAL 0010
+#define ADDRB  0040/* address bit */
 #define CMSPAR   0100  /* mark or space (stick) parity */
 #define CRTSCTS  0200  /* flow control */
 
diff --git a/arch/sparc/include/uapi/asm/termbits.h 
b/arch/sparc/include/uapi/asm/termbits.h
index ce5ad5d0f105..5eb1d547b5c4 100644
--- a/arch/sparc/include/uapi/asm/termbits.h
+++ b/arch/sparc/include/uapi/asm/termbits.h
@@ -201,6 +201,7 @@ struct ktermios {
 #define B350  0x1012
 #define B400  0x1013  */
 #define CIBAUD   0x100f  /* input baud rate (not used) */
+#define ADDRB0x2000  /* address bit */
 #define CMSPAR   0x4000  /* mark or space (stick) parity */
 #define CRTSCTS  0x8000  /* flow control */
 
diff --git a/drivers/char/pcmcia/synclink_cs.c 

Re: [PATCH] powerpc/numa: Handle partially initialized numa nodes

2022-04-11 Thread Geetika Moolchandani1



From: Srikar Dronamraju 
Sent: Friday, April 8, 2022 5:55 PM
To: Oscar Salvador 
Cc: Michael Ellerman ; linuxppc-dev 
; linux...@kvack.org ; 
Michal Hocko ; Geetika Moolchandani1 

Subject: Re: [PATCH] powerpc/numa: Handle partially initialized numa nodes

* Oscar Salvador  [2022-04-06 18:19:00]:

> On Wed, Mar 30, 2022 at 07:21:23PM +0530, Srikar Dronamraju wrote:
> >  arch/powerpc/mm/numa.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> > index b9b7fefbb64b..13022d734951 100644
> > --- a/arch/powerpc/mm/numa.c
> > +++ b/arch/powerpc/mm/numa.c
> > @@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu)
> >  if (new_nid < 0 || !node_possible(new_nid))
> >  new_nid = first_online_node;
> >
> > -   if (NODE_DATA(new_nid) == NULL) {
> > +   if (!node_online(new_nid)) {
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  /*
> >   * Need to ensure that NODE_DATA is initialized for a node from
>
> Because of this fix, I wanted to check whether we might have any more 
> fallouts due
> to ("mm: handle uninitialized numa nodes gracefully"), and it made me look 
> closer
> as to why powerpc is the only platform that special cases try_online_node(),
> while all others rely on cpu_up()->try_online_node() to do the right thing.
>
> So, I had a look.
> Let us rewind a bit:
>
> The commit that sets find_and_online_cpu_nid() in dlpar_online_cpu was
> e67e02a544e9 ("powerpc/pseries: Fix cpu hotplug crash with memoryless nodes").
>
> In there, it says that we have the following picture:
>
> partition_sched_domains
>  arch_update_cpu_topology
>   numa_update_cpu_topology
>find_and_online_cpu_nid
>
> and by the time find_and_online_cpu_nid() gets called to online the node, it 
> might be
> too late as we might have referenced some NODE_DATA() fields.
> Note that this happens at a much later stage in cpuhp.
>
> Also note that at a much earlier stage, we do already have a 
> try_online_node() in cpu_up(),
> which should allocate-and-online the node and prevent accessing garbage.
> But the problem is that, on powerpc, all possible cpus have the same node set 
> at boot stage
> (see arch/powerpc/mm/numa.c:mem_topology_setup),
> so cpu_to_node() returns the same thing until it the mapping gets update 
> (which happens in
> start_secondary()->set_numa_node()), and so, the try_online_node() from 
> cpu_up() does not
> apply on the right node, because it still holds the not-up-to-date mapping 
> node <-> cpu.
>
> (e.g: in my test case, when onlining a CPU belongin to node1, 
> cpu_up()->try_online_node()
>  tries to online node0, or whatever old mapping numa<->cpu is there).
>
> So, we have something like:
>
> dlpar_online_cpu
>  device_online
>   dev->bus->online
>cpu_subsys_online
> cpu_device_up
>  cpu_up
>   try_online_node (old mapping nid <-> cpu )
>   ...
>   ...
>   cphp_callbacks
>sched_cpu_activate
> cpuset_update_active_cpus
>  schedule_work(_hotplug_work)
>   cpuset_hotplug_work
>partition_sched_domains
> arch_update_cpu_topology
>  numa_update_cpu_topology
>   find_and_online_cpu_nid (online new_nid)
>
>
> So, yeah, the real onlining in 
> numa_update_cpu_topology()->find_and_online_cpu_nid()
> happens too late, that is why adding find_and_online_cpu_nid() back in 
> dlpar_online_cpu()
> fixed the issue, but we should not need this special casing at all.
>
> We do already know the numa<->cpu associativity in
> dlpar_online_cpu()->find_and_online_cpu_nid(), so we should just be able to
> update the numa<->cpu mapping, and let the try_online_node() do the right 
> thing.
>
> With this in mind, I came up with the following patch, where I carried out a 
> battery
> of tests for CPU hotplug stuff and it worked as expected.
> But I am not familiar with all powerpc pitfalls, e.g: dedicated vs shared 
> cpus etc, so
> I would like to hear from more familiar people.
>
> The patch is:
>
> From: Oscar Salvador 
> Date: Wed, 6 Apr 2022 14:39:15 +0200
> Subject: [PATCH] powerpc/numa: Associate numa node to its cpu earlier
>
> powerpc is the only platform that do not rely on
> cpu_up()->try_online_node() to bring up a numa node,
> and special cases it, instead, deep in its own machinery:
>
> dlpar_online_cpu
>  find_and_online_cpu_nid
>   try_online_node
>
> This should not be needed, but the thing is that the try_online_node()
> from cpu_up() will not apply on the right node, because cpu_to_node()
> will return the old mapping numa<->cpu that gets set on boot stage
> for all possible cpus.
>
> That can be seen easily if we try to print out the numa node passed
> to try_online_node() in cpu_up().
>
> The thing is that the numa<->cpu mapping does not get updated till a much
> later stage in start_secondary:
>
> start_secondary:
>  set_numa_node(numa_cpu_lookup_table[cpu])
>
> But 

Re: [PATCH v2] macintosh: via-pmu and via-cuda need RTC_LIB

2022-04-11 Thread Arnd Bergmann
On Sun, Apr 10, 2022 at 6:10 PM Randy Dunlap  wrote:
>
> Fix build when RTC_LIB is not set/enabled.
> Eliminates these build errors:
>
> m68k-linux-ld: drivers/macintosh/via-pmu.o: in function `pmu_set_rtc_time':
> drivers/macintosh/via-pmu.c:1769: undefined reference to `rtc_tm_to_time64'
> m68k-linux-ld: drivers/macintosh/via-cuda.o: in function `cuda_set_rtc_time':
> drivers/macintosh/via-cuda.c:797: undefined reference to `rtc_tm_to_time64'
>
> Fixes: 0792a2c8e0bb ("macintosh: Use common code to access RTC")
> Signed-off-by: Randy Dunlap 
> Reported-by: kernel test robot 
> Suggested-by: Christophe Leroy 
> Cc: Benjamin Herrenschmidt 
> Cc: Michael Ellerman 
> Cc: Kees Cook 
> Cc: Arnd Bergmann 
> Cc: Finn Thain 
> Cc: Geert Uytterhoeven 
> Cc: Nathan Chancellor 
> Cc: Nick Desaulniers 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
> v2: use RTC_LIB instead of open-coding the call to rtc_tm_to_time64()

This sounds like a step in the right direction. Note that there is
another open-coded rtc_tm_to_time64() in read_persistent_clock64(),
it might be worth changing that one as well and just always including
RTC_LIB, though that's a separate issue.

Acked-by: Arnd Bergmann 

> --- a/drivers/macintosh/Kconfig
> +++ b/drivers/macintosh/Kconfig
> @@ -44,6 +44,7 @@ config ADB_IOP
>  config ADB_CUDA
> bool "Support for Cuda/Egret based Macs and PowerMacs"
> depends on (ADB || PPC_PMAC) && !PPC_PMAC64
> +   select RTC_LIB
> help
>   This provides support for Cuda/Egret based Macintosh and
>   Power Macintosh systems. This includes most m68k based Macs,
> @@ -57,6 +58,7 @@ config ADB_CUDA
>  config ADB_PMU
> bool "Support for PMU based PowerMacs and PowerBooks"
> depends on PPC_PMAC || MAC
> +   select RTC_LIB
> help
>   On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the
>   PMU is an embedded microprocessor whose primary function is to

I think ideally these would just become regular RTC drivers, rather than
relying on the roundtrip through CONFIG_RTC_DRV_GENERIC.

Arnd


Re: [PATCH] powerpc/numa: Handle partially initialized numa nodes

2022-04-11 Thread Oscar Salvador
On Sun, Apr 10, 2022 at 09:28:38PM +1000, Michael Ellerman wrote:
> Yeah agreed, thanks for getting to the root of the problem.
> 
> Can you resend as a standalone patch. Because you sent it as a reply it
> won't be recognised by patchwork[1] which means it risks getting lost.

Hi Michael,

It's done [1].

thanks!

[1] 
http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20220411074934.4632-1-osalva...@suse.de/

 
-- 
Oscar Salvador
SUSE Labs


[PATCH] powerpc/numa: Associate numa node to its cpu earlier

2022-04-11 Thread Oscar Salvador
powerpc is the only platform that do not rely on
cpu_up()->try_online_node() to bring up a numa node,
and special cases it, instead, deep in its own machinery:

dlpar_online_cpu
 find_and_online_cpu_nid
  try_online_node

This should not be needed, but the thing is that the try_online_node()
from cpu_up() will not apply on the right node, because cpu_to_node()
will return the old mapping numa<->cpu that gets set on boot stage
for all possible cpus.

That can be seen easily if we try to print out the numa node passed
to try_online_node() in cpu_up().

The thing is that the numa<->cpu mapping does not get updated till a much
later stage in start_secondary:

start_secondary:
 set_numa_node(numa_cpu_lookup_table[cpu])

But we do not really care, as we already now the
CPU <-> NUMA associativity back in find_and_online_cpu_nid(),
so let us make use of that and set the proper numa<->cpu mapping,
so cpu_to_node() in cpu_up() returns the right node and
try_online_node() can do its work.

Signed-off-by: Oscar Salvador 
Reviewed-by: Srikar Dronamraju 
Tested-by: Geetika Moolchandani 
---
 arch/powerpc/include/asm/topology.h  |  8 ++-
 arch/powerpc/mm/numa.c   | 31 +++-
 arch/powerpc/platforms/pseries/hotplug-cpu.c |  2 +-
 3 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 36fcafb1fd6d..6ae1b2dce83e 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -111,14 +111,10 @@ static inline void unmap_cpu_from_node(unsigned long cpu) 
{}
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
-extern int find_and_online_cpu_nid(int cpu);
+extern void find_and_update_cpu_nid(int cpu);
 extern int cpu_to_coregroup_id(int cpu);
 #else
-static inline int find_and_online_cpu_nid(int cpu)
-{
-   return 0;
-}
-
+static inline void find_and_update_cpu_nid(int cpu) {}
 static inline int cpu_to_coregroup_id(int cpu)
 {
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b9b7fefbb64b..b5bc8b1a833d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1423,43 +1423,28 @@ static long vphn_get_associativity(unsigned long cpu,
return rc;
 }
 
-int find_and_online_cpu_nid(int cpu)
+void find_and_update_cpu_nid(int cpu)
 {
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
int new_nid;
 
/* Use associativity from first thread for all siblings */
if (vphn_get_associativity(cpu, associativity))
-   return cpu_to_node(cpu);
+   return;
 
+   /* Do not have previous associativity, so find it now. */
new_nid = associativity_to_nid(associativity);
+
if (new_nid < 0 || !node_possible(new_nid))
new_nid = first_online_node;
-
-   if (NODE_DATA(new_nid) == NULL) {
-#ifdef CONFIG_MEMORY_HOTPLUG
-   /*
-* Need to ensure that NODE_DATA is initialized for a node from
-* available memory (see memblock_alloc_try_nid). If unable to
-* init the node, then default to nearest node that has memory
-* installed. Skip onlining a node if the subsystems are not
-* yet initialized.
-*/
-   if (!topology_inited || try_online_node(new_nid))
-   new_nid = first_online_node;
-#else
-   /*
-* Default to using the nearest node that has memory installed.
-* Otherwise, it would be necessary to patch the kernel MM code
-* to deal with more memoryless-node error conditions.
+   else
+   /* Associate node <-> cpu, so cpu_up() calls
+* try_online_node() on the right node.
 */
-   new_nid = first_online_node;
-#endif
-   }
+   set_cpu_numa_node(cpu, new_nid);
 
pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
cpu, new_nid);
-   return new_nid;
 }
 
 int cpu_to_coregroup_id(int cpu)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index b81fc846d99c..0f8cd8b06432 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -398,7 +398,7 @@ static int dlpar_online_cpu(struct device_node *dn)
if (get_hard_smp_processor_id(cpu) != thread)
continue;
cpu_maps_update_done();
-   find_and_online_cpu_nid(cpu);
+   find_and_update_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
if (rc) {
dlpar_offline_cpu(dn);
-- 
2.16.4