[PATCH] ibmvfc: add new fields for version 2 of several MADs

2020-10-25 Thread Tyrel Datwyler
Introduce a targetWWPN field to several MADs. Its possible that a scsi
ID of a target can change due to some fabric changes. The WWPN of the
scsi target provides a better way to identify the target. Also, add
flags for receiving MAD versioning information and advertising client
support for targetWWPN with the VIOS. This latter capability flag will
be required for future clients capable of requesting multiple hardware
queues from the host adapter.

Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.h | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 34debccfb142..c9c7f55baf55 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -54,6 +54,7 @@
 
 #define IBMVFC_MAD_SUCCESS 0x00
 #define IBMVFC_MAD_NOT_SUPPORTED   0xF1
+#define IBMVFC_MAD_VERSION_NOT_SUPP0xF2
 #define IBMVFC_MAD_FAILED  0xF7
 #define IBMVFC_MAD_DRIVER_FAILED   0xEE
 #define IBMVFC_MAD_CRQ_ERROR   0xEF
@@ -168,6 +169,8 @@ struct ibmvfc_npiv_login {
 #define IBMVFC_CAN_MIGRATE 0x01
 #define IBMVFC_CAN_USE_CHANNELS0x02
 #define IBMVFC_CAN_HANDLE_FPIN 0x04
+#define IBMVFC_CAN_USE_MAD_VERSION 0x08
+#define IBMVFC_CAN_SEND_VF_WWPN0x10
__be64 node_name;
struct srp_direct_buf async;
u8 partition_name[IBMVFC_MAX_NAME];
@@ -211,7 +214,9 @@ struct ibmvfc_npiv_login_resp {
__be64 capabilities;
 #define IBMVFC_CAN_FLUSH_ON_HALT   0x08
 #define IBMVFC_CAN_SUPPRESS_ABTS   0x10
-#define IBMVFC_CAN_SUPPORT_CHANNELS0x20
+#define IBMVFC_MAD_VERSION_CAP 0x20
+#define IBMVFC_HANDLE_VF_WWPN  0x40
+#define IBMVFC_CAN_SUPPORT_CHANNELS0x80
__be32 max_cmds;
__be32 scsi_id_sz;
__be64 max_dma_len;
@@ -293,7 +298,8 @@ struct ibmvfc_port_login {
__be32 reserved2;
struct ibmvfc_service_parms service_parms;
struct ibmvfc_service_parms service_parms_change;
-   __be64 reserved3[2];
+   __be64 targetWWPN;
+   __be64 reserved3;
 } __packed __aligned(8);
 
 struct ibmvfc_move_login {
@@ -344,7 +350,8 @@ struct ibmvfc_process_login {
__be16 status;
__be16 error;   /* also fc_reason */
__be32 reserved2;
-   __be64 reserved3[2];
+   __be64 targetWWPN;
+   __be64 reserved3;
 } __packed __aligned(8);
 
 struct ibmvfc_query_tgt {
@@ -378,7 +385,8 @@ struct ibmvfc_tmf {
__be32 cancel_key;
__be32 my_cancel_key;
__be32 pad;
-   __be64 reserved[2];
+   __be64 targetWWPN;
+   __be64 taskTag;
 } __packed __aligned(8);
 
 enum ibmvfc_fcp_rsp_info_codes {
@@ -474,7 +482,8 @@ struct ibmvfc_cmd {
__be64 correlation;
__be64 tgt_scsi_id;
__be64 tag;
-   __be64 reserved3[2];
+   __be64 targetWWPN;
+   __be64 reserved3;
struct ibmvfc_fcp_cmd_iu iu;
struct ibmvfc_fcp_rsp rsp;
 } __packed __aligned(8);
@@ -503,7 +512,8 @@ struct ibmvfc_passthru_iu {
__be64 correlation;
__be64 scsi_id;
__be64 tag;
-   __be64 reserved2[2];
+   __be64 targetWWPN;
+   __be64 reserved2;
 } __packed __aligned(8);
 
 struct ibmvfc_passthru_mad {
-- 
2.27.0



Re: [PATCH 0/4] arch, mm: improve robustness of direct map manipulation

2020-10-25 Thread Edgecombe, Rick P
On Sun, 2020-10-25 at 12:15 +0200, Mike Rapoport wrote:
> Indeed, for architectures that define CONFIG_ARCH_HAS_SET_DIRECT_MAP
> it is
> possible that __kernel_map_pages() would fail, but since this
> function is
> void, the failure will go unnoticed.

Could you elaborate on how this could happen? Do you mean during
runtime today or if something new was introduced?



Re: [PATCH 4/4] arch, mm: make kernel_page_present() always available

2020-10-25 Thread Edgecombe, Rick P
On Sun, 2020-10-25 at 12:15 +0200, Mike Rapoport wrote:
> index 7f248fc45317..16f878c26667 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -2228,7 +2228,6 @@ void __kernel_map_pages(struct page *page, int
> numpages, int enable)
>  }
>  #endif /* CONFIG_DEBUG_PAGEALLOC */
>  
> -#ifdef CONFIG_HIBERNATION
>  bool kernel_page_present(struct page *page)
>  {
> unsigned int level;
> @@ -2240,7 +2239,6 @@ bool kernel_page_present(struct page *page)
> pte = lookup_address((unsigned long)page_address(page),
> );
> return (pte_val(*pte) & _PAGE_PRESENT);
>  }
> -#endif /* CONFIG_HIBERNATION */

This is only used by hibernate today right? Makes sense that it should
return a correct answer if someone starts to use it without looking too
closely at the header. But could we just remove the default static
inline return true implementation and let the linker fail if someone
starts to use it outside hibernate? Then we could leave it compiled out
until then.

Also it looks like riscv does not have ARCH_HIBERNATION_POSSIBLE so the
new function added here couldn't be used yet. You could also just let
the linker catch it if riscv ever enables hibernate?


Re: [PATCH 2/4] PM: hibernate: improve robustness of mapping pages in the direct map

2020-10-25 Thread Edgecombe, Rick P
On Sun, 2020-10-25 at 12:15 +0200, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a page may
> be
> not present in the direct map and has to be explicitly mapped before
> it
> could be copied.
> 
> On arm64 it is possible that a page would be removed from the direct
> map
> using set_direct_map_invalid_noflush() but __kernel_map_pages() will
> refuse
> to map this page back if DEBUG_PAGEALLOC is disabled.

It looks to me that arm64 __kernel_map_pages() will still attempt to
map it if rodata_full is true, how does this happen?

> Explicitly use set_direct_map_{default,invalid}_noflush() for
> ARCH_HAS_SET_DIRECT_MAP case and debug_pagealloc_map_pages() for
> DEBUG_PAGEALLOC case.
> 
> While on that, rename kernel_map_pages() to hibernate_map_page() and
> drop
> numpages parameter.
> 
> Signed-off-by: Mike Rapoport 
> ---
>  kernel/power/snapshot.c | 29 +++--
>  1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index fa499466f645..ecb7b32ce77c 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -76,16 +76,25 @@ static inline void
> hibernate_restore_protect_page(void *page_address) {}
>  static inline void hibernate_restore_unprotect_page(void
> *page_address) {}
>  #endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
>  
> -#if defined(CONFIG_DEBUG_PAGEALLOC) ||
> defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
> -static inline void
> -kernel_map_pages(struct page *page, int numpages, int enable)
> +static inline void hibernate_map_page(struct page *page, int enable)
>  {
> - __kernel_map_pages(page, numpages, enable);
> + if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
> + unsigned long addr = (unsigned long)page_address(page);
> + int ret;
> +
> + if (enable)
> + ret = set_direct_map_default_noflush(page);
> + else
> + ret = set_direct_map_invalid_noflush(page);
> +
> + if (WARN_ON(ret))
> + return;
> +
> + flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> + } else {
> + debug_pagealloc_map_pages(page, 1, enable);
> + }
>  }
> -#else
> -static inline void
> -kernel_map_pages(struct page *page, int numpages, int enable) {}
> -#endif
>  
>  static int swsusp_page_is_free(struct page *);
>  static void swsusp_set_page_forbidden(struct page *);
> @@ -1366,9 +1375,9 @@ static void safe_copy_page(void *dst, struct
> page *s_page)
>   if (kernel_page_present(s_page)) {
>   do_copy_page(dst, page_address(s_page));
>   } else {
> - kernel_map_pages(s_page, 1, 1);
> + hibernate_map_page(s_page, 1);
>   do_copy_page(dst, page_address(s_page));
> - kernel_map_pages(s_page, 1, 0);
> + hibernate_map_page(s_page, 0);
>   }
>  }
>  

If somehow a page was unmapped such that
set_direct_map_default_noflush() would fail, then this code introduces
a WARN, but it will still try to read the unmapped page. Why not just
have the WARN's inside of __kernel_map_pages() if they fail and then
have a warning for the debug page alloc cases as well? Since logic
around both expects them not to fail.




Re: [PATCH 0/3] warn and suppress irqflood

2020-10-25 Thread Pingfan Liu
On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  wrote:
>
> On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > platform.
> > When the bug happens, the kernel is totally occupies by irq.  Currently, 
> > there
> > may be nothing or just soft lockup warning showed in console. It is better
> > to warn users with irq flood info.
> >
> > In the kdump case, the kernel can move on by suppressing the irq flood.
>
> You're curing the symptom not the cause and the cure is just magic and
> can't work reliably.
Yeah, it is magic. But at least, it is better to printk something and
alarm users about what happens. With current code, it may show nothing
when system hangs.
>
> Where is that irq flood originated from and why is none of the
> mechanisms we have in place to shut it up working?
The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus
driver (i2c-opal.c). After i2c_opal_send_request(), the bug is
triggered.

But things are complicated by introducing a firmware layer: Skiboot.
This software layer hides the detail of manipulating the hardware from
Linux.

I guess the software logic can not enter a sane state when kernel crashes.

Cc Skiboot and ppc64 community to see whether anyone has idea about it.

Thanks,
Pingfan


[PATCH 4/4] arch, mm: make kernel_page_present() always available

2020-10-25 Thread Mike Rapoport
From: Mike Rapoport 

For architectures that enable ARCH_HAS_SET_MEMORY having the ability to
verify that a page is mapped in the kernel direct map can be useful
regardless of hibernation.

Add RISC-V implementation of kernel_page_present() and update its forward
declarations and stubs to be a part of set_memory API.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/riscv/include/asm/set_memory.h |  1 +
 arch/riscv/mm/pageattr.c| 29 +
 arch/x86/include/asm/set_memory.h   |  1 +
 arch/x86/mm/pat/set_memory.c|  2 --
 include/linux/mm.h  |  7 ---
 include/linux/set_memory.h  |  5 +
 7 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h 
b/arch/arm64/include/asm/cacheflush.h
index 9384fd8fc13c..45217f21f1fe 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -140,6 +140,7 @@ int set_memory_valid(unsigned long addr, int numpages, int 
enable);
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
 
 #include 
 
diff --git a/arch/riscv/include/asm/set_memory.h 
b/arch/riscv/include/asm/set_memory.h
index 4c5bae7ca01c..d690b08dff2a 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -24,6 +24,7 @@ static inline int set_memory_nx(unsigned long addr, int 
numpages) { return 0; }
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 321b09d2e2ea..87ba5a68bbb8 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -198,3 +198,32 @@ void __kernel_map_pages(struct page *page, int numpages, 
int enable)
 __pgprot(0), __pgprot(_PAGE_PRESENT));
 }
 #endif
+
+bool kernel_page_present(struct page *page)
+{
+   unsigned long addr = (unsigned long)page_address(page);
+   pgd_t *pgd;
+   pud_t *pud;
+   p4d_t *p4d;
+   pmd_t *pmd;
+   pte_t *pte;
+
+   pgd = pgd_offset_k(addr);
+   if (!pgd_present(*pgd))
+   return false;
+
+   p4d = p4d_offset(pgd, addr);
+   if (!p4d_present(*p4d))
+   return false;
+
+   pud = pud_offset(p4d, addr);
+   if (!pud_present(*pud))
+   return false;
+
+   pmd = pmd_offset(pud, addr);
+   if (!pmd_present(*pmd))
+   return false;
+
+   pte = pte_offset_kernel(pmd, addr);
+   return pte_present(*pte);
+}
diff --git a/arch/x86/include/asm/set_memory.h 
b/arch/x86/include/asm/set_memory.h
index 5948218f35c5..4352f08bfbb5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -82,6 +82,7 @@ int set_pages_rw(struct page *page, int numpages);
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
 
 extern int kernel_set_to_readonly;
 
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7f248fc45317..16f878c26667 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2228,7 +2228,6 @@ void __kernel_map_pages(struct page *page, int numpages, 
int enable)
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
-#ifdef CONFIG_HIBERNATION
 bool kernel_page_present(struct page *page)
 {
unsigned int level;
@@ -2240,7 +2239,6 @@ bool kernel_page_present(struct page *page)
pte = lookup_address((unsigned long)page_address(page), );
return (pte_val(*pte) & _PAGE_PRESENT);
 }
-#endif /* CONFIG_HIBERNATION */
 
 int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
   unsigned numpages, unsigned long page_flags)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ab0ef6bd351d..44b82f22e76a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2937,16 +2937,9 @@ static inline void debug_pagealloc_map_pages(struct page 
*page,
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, enable);
 }
-
-#ifdef CONFIG_HIBERNATION
-extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
 #else  /* CONFIG_DEBUG_PAGEALLOC */
 static inline void debug_pagealloc_map_pages(struct page *page,
 int numpages, int enable) {}
-#ifdef CONFIG_HIBERNATION
-static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef __HAVE_ARCH_GATE_AREA
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 860e0f843c12..fe1aa4e54680 100644
--- 

[PATCH 3/4] arch, mm: restore dependency of __kernel_map_pages() of DEBUG_PAGEALLOC

2020-10-25 Thread Mike Rapoport
From: Mike Rapoport 

The design of DEBUG_PAGEALLOC presumes that __kernel_map_pages() must never
fail. With this assumption is wouldn't be safe to allow general usage of
this function.

Moreover, some architectures that implement __kernel_map_pages() have this
function guarded by #ifdef DEBUG_PAGEALLOC and some refuse to map/unmap
pages when page allocation debugging is disabled at runtime.

As all the users of __kernel_map_pages() were converted to use
debug_pagealloc_map_pages() it is safe to make it available only when
DEBUG_PAGEALLOC is set.

Signed-off-by: Mike Rapoport 
---
 arch/Kconfig |  3 +++
 arch/arm64/Kconfig   |  4 +---
 arch/arm64/mm/pageattr.c |  6 --
 arch/powerpc/Kconfig |  5 +
 arch/riscv/Kconfig   |  4 +---
 arch/riscv/include/asm/pgtable.h |  2 --
 arch/riscv/mm/pageattr.c |  2 ++
 arch/s390/Kconfig|  4 +---
 arch/sparc/Kconfig   |  4 +---
 arch/x86/Kconfig |  4 +---
 arch/x86/mm/pat/set_memory.c |  2 ++
 include/linux/mm.h   | 10 +++---
 12 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..56d4752b6db6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1028,6 +1028,9 @@ config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
 
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+   bool
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 08fa3a1c50f0..1d4da0843668 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -71,6 +71,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_SYM_ANNOTATIONS
+   select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
@@ -1004,9 +1005,6 @@ config HOLES_IN_ZONE
 
 source "kernel/Kconfig.hz"
 
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
-   def_bool y
-
 config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_VMEMMAP_ENABLE
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 1b94f5b82654..18613d8834db 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -178,13 +178,15 @@ int set_direct_map_default_noflush(struct page *page)
   PAGE_SIZE, change_page_range, );
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
 void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
-   if (!debug_pagealloc_enabled() && !rodata_full)
+   if (!rodata_full)
return;
 
set_memory_valid((unsigned long)page_address(page), numpages, enable);
 }
+#endif /* CONFIG_DEBUG_PAGEALLOC */
 
 /*
  * This function is used to determine if a linear map page has been marked as
@@ -204,7 +206,7 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
 
-   if (!debug_pagealloc_enabled() && !rodata_full)
+   if (!rodata_full)
return true;
 
pgdp = pgd_offset_k(addr);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..ad8a83f3ddca 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -146,6 +146,7 @@ config PPC
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_SUPPORTS_ATOMIC_RMW
+   select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC32 || PPC_BOOK3S_64
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_QUEUED_RWLOCKS  if PPC_QUEUED_SPINLOCKS
@@ -355,10 +356,6 @@ config PPC_OF_PLATFORM_PCI
depends on PCI
depends on PPC64 # not supported on 32 bits yet
 
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
-   depends on PPC32 || PPC_BOOK3S_64
-   def_bool y
-
 config ARCH_SUPPORTS_UPROBES
def_bool y
 
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d5e7ca08f22c..c704562ba45e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -14,6 +14,7 @@ config RISCV
def_bool y
select ARCH_CLOCKSOURCE_INIT
select ARCH_SUPPORTS_ATOMIC_RMW
+   select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -153,9 +154,6 @@ config ARCH_SELECT_MEMORY_MODEL
 config ARCH_WANT_GENERAL_HUGETLB
def_bool y
 
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
-   def_bool y
-
 config SYS_SUPPORTS_HUGETLBFS
depends on MMU
def_bool y
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 183f1f4b2ae6..41a72861987c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ 

[PATCH 2/4] PM: hibernate: improve robustness of mapping pages in the direct map

2020-10-25 Thread Mike Rapoport
From: Mike Rapoport 

When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a page may be
not present in the direct map and has to be explicitly mapped before it
could be copied.

On arm64 it is possible that a page would be removed from the direct map
using set_direct_map_invalid_noflush() but __kernel_map_pages() will refuse
to map this page back if DEBUG_PAGEALLOC is disabled.

Explicitly use set_direct_map_{default,invalid}_noflush() for
ARCH_HAS_SET_DIRECT_MAP case and debug_pagealloc_map_pages() for
DEBUG_PAGEALLOC case.

While on that, rename kernel_map_pages() to hibernate_map_page() and drop
numpages parameter.

Signed-off-by: Mike Rapoport 
---
 kernel/power/snapshot.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index fa499466f645..ecb7b32ce77c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -76,16 +76,25 @@ static inline void hibernate_restore_protect_page(void 
*page_address) {}
 static inline void hibernate_restore_unprotect_page(void *page_address) {}
 #endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
 
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable)
+static inline void hibernate_map_page(struct page *page, int enable)
 {
-   __kernel_map_pages(page, numpages, enable);
+   if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
+   unsigned long addr = (unsigned long)page_address(page);
+   int ret;
+
+   if (enable)
+   ret = set_direct_map_default_noflush(page);
+   else
+   ret = set_direct_map_invalid_noflush(page);
+
+   if (WARN_ON(ret))
+   return;
+
+   flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+   } else {
+   debug_pagealloc_map_pages(page, 1, enable);
+   }
 }
-#else
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable) {}
-#endif
 
 static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
@@ -1366,9 +1375,9 @@ static void safe_copy_page(void *dst, struct page *s_page)
if (kernel_page_present(s_page)) {
do_copy_page(dst, page_address(s_page));
} else {
-   kernel_map_pages(s_page, 1, 1);
+   hibernate_map_page(s_page, 1);
do_copy_page(dst, page_address(s_page));
-   kernel_map_pages(s_page, 1, 0);
+   hibernate_map_page(s_page, 0);
}
 }
 
-- 
2.28.0



[PATCH 1/4] mm: introduce debug_pagealloc_map_pages() helper

2020-10-25 Thread Mike Rapoport
From: Mike Rapoport 

When CONFIG_DEBUG_PAGEALLOC is enabled, it unmaps pages from the kernel
direct mapping after free_pages(). The pages than need to be mapped back
before they could be used. Theese mapping operations use
__kernel_map_pages() guarded with with debug_pagealloc_enabled().

The only place that calls __kernel_map_pages() without checking whether
DEBUG_PAGEALLOC is enabled is the hibernation code that presumes
availability of this function when ARCH_HAS_SET_DIRECT_MAP is set.
Still, on arm64, __kernel_map_pages() will bail out when DEBUG_PAGEALLOC is
not enabled but set_direct_map_invalid_noflush() may render some pages not
present in the direct map and hibernation code won't be able to save such
pages.

To make page allocation debugging and hibernation interaction more robust,
the dependency on DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP has to be made
more explicit.

Start with combining the guard condition and the call to
__kernel_map_pages() into a single debug_pagealloc_map_pages() function to
emphasize that __kernel_map_pages() should not be called without
DEBUG_PAGEALLOC and use this new function to map/unmap pages when page
allocation debug is enabled.

As the only remaining user of kernel_map_pages() is the hibernation code,
mode that function into kernel/power/snapshot.c closer to a caller.

Signed-off-by: Mike Rapoport 
---
 include/linux/mm.h  | 16 +++-
 kernel/power/snapshot.c | 11 +++
 mm/memory_hotplug.c |  3 +--
 mm/page_alloc.c |  6 ++
 mm/slab.c   |  8 +++-
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef360fe70aaf..14e397f3752c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2927,21 +2927,19 @@ static inline bool debug_pagealloc_enabled_static(void)
 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
 extern void __kernel_map_pages(struct page *page, int numpages, int enable);
 
-/*
- * When called in DEBUG_PAGEALLOC context, the call should most likely be
- * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
- */
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable)
+static inline void debug_pagealloc_map_pages(struct page *page,
+int numpages, int enable)
 {
-   __kernel_map_pages(page, numpages, enable);
+   if (debug_pagealloc_enabled_static())
+   __kernel_map_pages(page, numpages, enable);
 }
+
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
 #else  /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable) {}
+static inline void debug_pagealloc_map_pages(struct page *page,
+int numpages, int enable) {}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 46b1804c1ddf..fa499466f645 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -76,6 +76,17 @@ static inline void hibernate_restore_protect_page(void 
*page_address) {}
 static inline void hibernate_restore_unprotect_page(void *page_address) {}
 #endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
 
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+   __kernel_map_pages(page, numpages, enable);
+}
+#else
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable) {}
+#endif
+
 static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b44d4c7ba73b..e2b6043a4428 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -614,8 +614,7 @@ void generic_online_page(struct page *page, unsigned int 
order)
 * so we should map it first. This is better than introducing a special
 * case in page freeing fast path.
 */
-   if (debug_pagealloc_enabled_static())
-   kernel_map_pages(page, 1 << order, 1);
+   debug_pagealloc_map_pages(page, 1 << order, 1);
__free_pages_core(page, order);
totalram_pages_add(1UL << order);
 #ifdef CONFIG_HIGHMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23f5066bd4a5..9a66a1ff9193 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1272,8 +1272,7 @@ static __always_inline bool free_pages_prepare(struct 
page *page,
 */
arch_free_page(page, order);
 
-   if (debug_pagealloc_enabled_static())
-   kernel_map_pages(page, 1 << order, 0);
+   

[PATCH 0/4] arch, mm: improve robustness of direct map manipulation

2020-10-25 Thread Mike Rapoport
From: Mike Rapoport 

Hi,

During recent discussion about KVM protected memory, David raised a concern
about usage of __kernel_map_pages() outside of DEBUG_PAGEALLOC scope [1].

Indeed, for architectures that define CONFIG_ARCH_HAS_SET_DIRECT_MAP it is
possible that __kernel_map_pages() would fail, but since this function is
void, the failure will go unnoticed.

Moreover, there's lack of consistency of __kernel_map_pages() semantics
across architectures as some guard this function with
#ifdef DEBUG_PAGEALLOC, some refuse to update the direct map if page
allocation debugging is disabled at run time and some allow modifying the
direct map regardless of DEBUG_PAGEALLOC settings.

This set straightens this out by restoring dependency of
__kernel_map_pages() on DEBUG_PAGEALLOC and updating the call sites
accordingly. 

[1] https://lore.kernel.org/lkml/2759b4bf-e1e3-d006-7d86-78a403482...@redhat.com

Mike Rapoport (4):
  mm: introduce debug_pagealloc_map_pages() helper
  PM: hibernate: improve robustness of mapping pages in the direct map
  arch, mm: restore dependency of __kernel_map_pages() of DEBUG_PAGEALLOC
  arch, mm: make kernel_page_present() always available

 arch/Kconfig|  3 +++
 arch/arm64/Kconfig  |  4 +---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/mm/pageattr.c|  6 +++--
 arch/powerpc/Kconfig|  5 +
 arch/riscv/Kconfig  |  4 +---
 arch/riscv/include/asm/pgtable.h|  2 --
 arch/riscv/include/asm/set_memory.h |  1 +
 arch/riscv/mm/pageattr.c| 31 +
 arch/s390/Kconfig   |  4 +---
 arch/sparc/Kconfig  |  4 +---
 arch/x86/Kconfig|  4 +---
 arch/x86/include/asm/set_memory.h   |  1 +
 arch/x86/mm/pat/set_memory.c|  4 ++--
 include/linux/mm.h  | 35 +
 include/linux/set_memory.h  |  5 +
 kernel/power/snapshot.c | 24 ++--
 mm/memory_hotplug.c |  3 +--
 mm/page_alloc.c |  6 ++---
 mm/slab.c   |  8 +++
 20 files changed, 97 insertions(+), 58 deletions(-)

-- 
2.28.0