[PATCH] drm/i915: Fix i915_gem_wait_for_idle oops due to active_requests check

2018-12-20 Thread Bin Yang
i915_gem_wait_for_idle() waits for all requests being completed and
calls i915_retire_requests() to retire them. It assumes the
active_requests should be zero finally.

In i915_retire_requests(), it will retire all requests on the active
rings. Unfortunately, active_requests is increased in
i915_request_alloc() and reduced in i915_request_retire(), but the
request is added into active rings in i915_request_add().

If i915_gem_wait_for_idle() is called between i915_request_alloc()
and i915_request_add(), this request will not be retired. Then, the
active_requests will not be zero in the end.

Normally, i915_request_alloc() and i915_request_add() will be called
in sequence with drm.struct_mutex locked. But in
intel_vgpu_create_workload(), it will pre-allocate the request and
call i915_request_add() in the workload thread for performance
optimization. The above issue will be triggered.

This patch introduced a new counter named reserved_requests for
request allocation. The active_requests will be increased when
the request is really added into the active rings.

8<- below is the oops when above issue is hitted.

[2018-11-28 23:17:54] [12278.310417] kernel BUG at 
drivers/gpu/drm/i915/i915_gem.c:4702!
[2018-11-28 23:17:54] [12278.310802] invalid opcode:  [#1] PREEMPT SMP
[2018-11-28 23:17:54] [12278.311012] CPU: 0 PID: 61 Comm: kswapd0 Tainted: G
 U  WC4.19.0-26.iot-lts2018-sos #1
[2018-11-28 23:17:54] [12278.311393] RIP: 
0010:i915_gem_wait_for_idle.part.78.cold.114+0x45/0x47
[2018-11-28 23:17:54] [12278.311675] Code: 7b 8b ae ff 48 8b 35 e6 92 3c 01 49 
c7 c0 af 48 55 a9 b9 5e 12 00 00 48 c7 c2 50 7a 0b a9 48 c7 c7 f4 e6 60 a8 e8 
37 38 b6 ff <0f> 0b 48 c7 c1 a8 59 55 a9 ba b8 12 00 00 48 c7 c6 20 7a 0b a9 48
[2018-11-28 23:17:55] [12278.312447] RSP: 0018:8e31acd8bbb8 EFLAGS: 00010246
[2018-11-28 23:17:55] [12278.312673] RAX: 000e RBX: 
000a RCX: 
[2018-11-28 23:17:55] [12278.312971] RDX: 0001 RSI: 
0008 RDI: 8e31ae841400
[2018-11-28 23:17:55] [12278.313268] RBP: 8e31acea8340 R08: 
01416578 R09: 8e31aea15000
[2018-11-28 23:17:55] [12278.313566] R10: 8e31ae807100 R11: 
8e31ae841400 R12: 8e31acea
[2018-11-28 23:17:55] [12278.313863] R13: 0b2ab1d38ed0 R14: 
 R15: 8e31acd8bd70
[2018-11-28 23:17:55] [12278.314162] FS:  () 
GS:8e31afa0() knlGS:
[2018-11-28 23:17:55] [12278.314499] CS:  0010 DS:  ES:  CR0: 
80050033
[2018-11-28 23:17:55] [12278.314741] CR2: 7ff94948f000 CR3: 
000226813000 CR4: 003406f0
[2018-11-28 23:17:55] [12278.315039] Call Trace:
[2018-11-28 23:17:55] [12278.315162]  i915_gem_shrink+0x3b7/0x4b0
[2018-11-28 23:17:55] [12278.315340]  i915_gem_shrinker_scan+0x104/0x130
[2018-11-28 23:17:55] [12278.315537]  do_shrink_slab+0x12c/0x2c0
[2018-11-28 23:17:55] [12278.315706]  shrink_slab+0x225/0x2c0
[2018-11-28 23:17:55] [12278.315864]  shrink_node+0xe4/0x430
[2018-11-28 23:17:55] [12278.316018]  kswapd+0x3ce/0x730
[2018-11-28 23:17:55] [12278.316161]  ? mem_cgroup_shrink_node+0x1a0/0x1a0
[2018-11-28 23:17:55] [12278.316365]  kthread+0x11e/0x140
[2018-11-28 23:17:55] [12278.316508]  ? kthread_create_worker_on_cpu+0x70/0x70
[2018-11-28 23:17:55] [12278.316727]  ret_from_fork+0x3a/0x50
[2018-11-28 23:17:55] [12278.316884] Modules linked in: igb_avb(C) xhci_pci 
xhci_hcd dca ici_isys_mod ipu4_acpi intel_ipu4_isys_csslib intel_ipu4_psys 
intel_ipu4_psys_csslib intel_ipu4_mmu intel_ipu4 iova crlmodule_lite

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109005
Signed-off-by: Bin Yang 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 drivers/gpu/drm/i915/i915_request.c | 10 +++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 815db160b966..7a757f0f504f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1948,6 +1948,7 @@ struct drm_i915_private {
struct list_head active_rings;
struct list_head closed_vma;
u32 active_requests;
+   u32 reserved_requests;
u32 request_serial;
 
/**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d92147ab4489..1873e21c84c1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -200,7 +200,7 @@ void i915_gem_unpark(struct drm_i915_private *i915)
GEM_TRACE("\n");
 
lockdep_assert_held(>drm.struct_mutex);
-   GEM_BUG_ON(!i915->gt.active_requests);
+   GEM_BUG_ON(!i915->gt.reserved_requests);
 
if (i915->gt.awake)
return;
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 637909c59f1f..394283799ee9 10

[PATCH] pstore: fix incorrect persistent ram buffer mapping

2018-09-11 Thread Bin Yang
persistent_ram_vmap() returns the page start vaddr.
persistent_ram_iomap() supports non-page-aligned mapping.

persistent_ram_buffer_map() always adds offset-in-page to the vaddr
returned from these two functions, which causes incorrect mapping of
non-page-aligned persistent ram buffer.

Signed-off-by: Bin Yang 
---
 fs/pstore/ram_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 951a14e..7c05fdd 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -429,7 +429,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t 
size,
vaddr = vmap(pages, page_count, VM_MAP, prot);
kfree(pages);
 
-   return vaddr;
+   return vaddr + offset_in_page(start);
 }
 
 static void *persistent_ram_iomap(phys_addr_t start, size_t size,
@@ -468,7 +468,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, 
phys_addr_t size,
return -ENOMEM;
}
 
-   prz->buffer = prz->vaddr + offset_in_page(start);
+   prz->buffer = prz->vaddr;
prz->buffer_size = size - sizeof(struct persistent_ram_buffer);
 
return 0;
@@ -515,7 +515,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 
if (prz->vaddr) {
if (pfn_valid(prz->paddr >> PAGE_SHIFT)) {
-   vunmap(prz->vaddr);
+   vunmap(prz->vaddr - offset_in_page(prz->paddr));
} else {
iounmap(prz->vaddr);
release_mem_region(prz->paddr, prz->size);
-- 
2.7.4



[PATCH] pstore: fix incorrect persistent ram buffer mapping

2018-09-11 Thread Bin Yang
persistent_ram_vmap() returns the page start vaddr.
persistent_ram_iomap() supports non-page-aligned mapping.

persistent_ram_buffer_map() always adds offset-in-page to the vaddr
returned from these two functions, which causes incorrect mapping of
non-page-aligned persistent ram buffer.

Signed-off-by: Bin Yang 
---
 fs/pstore/ram_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 951a14e..7c05fdd 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -429,7 +429,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t 
size,
vaddr = vmap(pages, page_count, VM_MAP, prot);
kfree(pages);
 
-   return vaddr;
+   return vaddr + offset_in_page(start);
 }
 
 static void *persistent_ram_iomap(phys_addr_t start, size_t size,
@@ -468,7 +468,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, 
phys_addr_t size,
return -ENOMEM;
}
 
-   prz->buffer = prz->vaddr + offset_in_page(start);
+   prz->buffer = prz->vaddr;
prz->buffer_size = size - sizeof(struct persistent_ram_buffer);
 
return 0;
@@ -515,7 +515,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 
if (prz->vaddr) {
if (pfn_valid(prz->paddr >> PAGE_SHIFT)) {
-   vunmap(prz->vaddr);
+   vunmap(prz->vaddr - offset_in_page(prz->paddr));
} else {
iounmap(prz->vaddr);
release_mem_region(prz->paddr, prz->size);
-- 
2.7.4



[PATCH v3 4/5] x86/mm: optimize static_protection() by using overlap()

2018-08-20 Thread Bin Yang
When changing a 4K page attr inside the large page range,
try_preserve_large_page() will call static_protections() to check all
4K pages inside the large page range.

In the worst case, when changing a 4K page attr inside 1G large page
range, static_protections() will be called for 262144 times for single
4K page check.

It can be optimized to check for overlapping ranges instead of looping
all pages. If the checking range has overlap with a specific protection
area, it should add the corresponding protection flag.

Suggested-by: Dave Hansen 
Suggested-by: Shevchenko, Andriy 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 50 +-
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f630eb4..fd90c5b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,6 +106,21 @@ within_inclusive(unsigned long addr, unsigned long start, 
unsigned long end)
return addr >= start && addr <= end;
 }
 
+static inline bool
+overlap(unsigned long start1, unsigned long end1,
+   unsigned long start2, unsigned long end2)
+{
+   /* Is 'start2' within area 1? */
+   if (start1 <= start2 && end1 > start2)
+   return true;
+
+   /* Is 'start1' within area 2? */
+   if (start2 <= start1 && end2 > start1)
+   return true;
+
+   return false;
+}
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -293,7 +308,7 @@ static void cpa_flush_array(unsigned long *start, int 
numpages, int cache,
  * checks and fixes these known static required protection bits.
  */
 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
-  unsigned long pfn)
+  unsigned long len, unsigned long pfn)
 {
pgprot_t forbidden = __pgprot(0);
 
@@ -302,7 +317,9 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 */
 #ifdef CONFIG_PCI_BIOS
-   if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END 
>> PAGE_SHIFT))
+   if (pcibios_enabled &&
+   overlap(pfn, pfn + PFN_DOWN(len),
+   PFN_DOWN(BIOS_BEGIN), PFN_DOWN(BIOS_END)))
pgprot_val(forbidden) |= _PAGE_NX;
 #endif
 
@@ -311,7 +328,8 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * Does not cover __inittext since that is gone later on. On
 * 64bit we do not enforce !NX on the low mapping
 */
-   if (within(address, (unsigned long)_text, (unsigned long)_etext))
+   if (overlap(address, address + len,
+   (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
 
/*
@@ -320,8 +338,9 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * so do not enforce until kernel_set_to_readonly is true.
 */
if (kernel_set_to_readonly &&
-   within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
-  __pa_symbol(__end_rodata) >> PAGE_SHIFT))
+   overlap(pfn, pfn + PFN_DOWN(len),
+   PFN_DOWN(__pa_symbol(__start_rodata)),
+   PFN_DOWN(__pa_symbol(__end_rodata
pgprot_val(forbidden) |= _PAGE_RW;
 
 #if defined(CONFIG_X86_64)
@@ -335,8 +354,8 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * at no extra cost.
 */
if (kernel_set_to_readonly &&
-   within(address, (unsigned long)_text,
-  (unsigned long)__end_rodata_hpage_align)) {
+   overlap(address, address + len, (unsigned long)_text,
+   (unsigned long)__end_rodata_hpage_align)) {
unsigned int level;
 
/*
@@ -375,19 +394,14 @@ static inline bool
 needs_static_protections(pgprot_t prot, unsigned long address,
unsigned long len, unsigned long pfn)
 {
-   int i;
+   pgprot_t chk_prot;
 
address &= PAGE_MASK;
len = PFN_ALIGN(len);
-   for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) {
-   pgprot_t chk_prot = static_protections(prot, address, pfn);
-
-   if (pgprot_val(chk_prot) != pgprot_val(prot))
-   return true;
-   }
+   chk_prot = static_protections(prot, address, len, pfn);
 
/* Does static_protections() demand a change ? */
-   return false;
+   return pgprot_val(chk_prot) != pgprot_val(prot);
 }
 
 /*
@@ -650,7 +664,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pfn = old_pfn + ((address & (psize - 1)) >&g

[PATCH v3 4/5] x86/mm: optimize static_protection() by using overlap()

2018-08-20 Thread Bin Yang
When changing a 4K page attr inside the large page range,
try_preserve_large_page() will call static_protections() to check all
4K pages inside the large page range.

In the worst case, when changing a 4K page attr inside 1G large page
range, static_protections() will be called for 262144 times for single
4K page check.

It can be optimized to check for overlapping ranges instead of looping
all pages. If the checking range has overlap with a specific protection
area, it should add the corresponding protection flag.

Suggested-by: Dave Hansen 
Suggested-by: Shevchenko, Andriy 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 50 +-
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f630eb4..fd90c5b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,6 +106,21 @@ within_inclusive(unsigned long addr, unsigned long start, 
unsigned long end)
return addr >= start && addr <= end;
 }
 
+static inline bool
+overlap(unsigned long start1, unsigned long end1,
+   unsigned long start2, unsigned long end2)
+{
+   /* Is 'start2' within area 1? */
+   if (start1 <= start2 && end1 > start2)
+   return true;
+
+   /* Is 'start1' within area 2? */
+   if (start2 <= start1 && end2 > start1)
+   return true;
+
+   return false;
+}
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -293,7 +308,7 @@ static void cpa_flush_array(unsigned long *start, int 
numpages, int cache,
  * checks and fixes these known static required protection bits.
  */
 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
-  unsigned long pfn)
+  unsigned long len, unsigned long pfn)
 {
pgprot_t forbidden = __pgprot(0);
 
@@ -302,7 +317,9 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 */
 #ifdef CONFIG_PCI_BIOS
-   if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END 
>> PAGE_SHIFT))
+   if (pcibios_enabled &&
+   overlap(pfn, pfn + PFN_DOWN(len),
+   PFN_DOWN(BIOS_BEGIN), PFN_DOWN(BIOS_END)))
pgprot_val(forbidden) |= _PAGE_NX;
 #endif
 
@@ -311,7 +328,8 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * Does not cover __inittext since that is gone later on. On
 * 64bit we do not enforce !NX on the low mapping
 */
-   if (within(address, (unsigned long)_text, (unsigned long)_etext))
+   if (overlap(address, address + len,
+   (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
 
/*
@@ -320,8 +338,9 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * so do not enforce until kernel_set_to_readonly is true.
 */
if (kernel_set_to_readonly &&
-   within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
-  __pa_symbol(__end_rodata) >> PAGE_SHIFT))
+   overlap(pfn, pfn + PFN_DOWN(len),
+   PFN_DOWN(__pa_symbol(__start_rodata)),
+   PFN_DOWN(__pa_symbol(__end_rodata
pgprot_val(forbidden) |= _PAGE_RW;
 
 #if defined(CONFIG_X86_64)
@@ -335,8 +354,8 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * at no extra cost.
 */
if (kernel_set_to_readonly &&
-   within(address, (unsigned long)_text,
-  (unsigned long)__end_rodata_hpage_align)) {
+   overlap(address, address + len, (unsigned long)_text,
+   (unsigned long)__end_rodata_hpage_align)) {
unsigned int level;
 
/*
@@ -375,19 +394,14 @@ static inline bool
 needs_static_protections(pgprot_t prot, unsigned long address,
unsigned long len, unsigned long pfn)
 {
-   int i;
+   pgprot_t chk_prot;
 
address &= PAGE_MASK;
len = PFN_ALIGN(len);
-   for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) {
-   pgprot_t chk_prot = static_protections(prot, address, pfn);
-
-   if (pgprot_val(chk_prot) != pgprot_val(prot))
-   return true;
-   }
+   chk_prot = static_protections(prot, address, len, pfn);
 
/* Does static_protections() demand a change ? */
-   return false;
+   return pgprot_val(chk_prot) != pgprot_val(prot);
 }
 
 /*
@@ -650,7 +664,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pfn = old_pfn + ((address & (psize - 1)) >&g

[PATCH v3 2/5] x86/mm: avoid static_protection() checking if not whole large page attr change

2018-08-20 Thread Bin Yang
The range check whether the address is aligned to the large page and
covers the full large page (1G or 2M) is obvious to do _before_
static_protection() check, because if the requested range does not fit
and has a different pgprot_val() then it will decide to split after the
check anyway.

The approach and some of the comments came from Thomas Gleixner's
email example for how to do this

Suggested-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 35 ---
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 68613fd..091f1d3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -645,11 +645,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
}
 
/*
+* If the requested address range is not aligned to the start of
+* the large page or does not cover the full range, split it up.
+* No matter what the static_protections() check below does, it
+* would anyway result in a split after doing all the check work
+* for nothing.
+*/
+   addr = address & pmask;
+   if (address != addr || cpa->numpages != numpages)
+   goto out_unlock;
+
+   /*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   addr = address & pmask;
pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
@@ -659,24 +669,11 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
}
 
 
-   /*
-* We need to change the attributes. Check, whether we can
-* change the large page in one go. We request a split, when
-* the address is not aligned and the number of pages is
-* smaller than the number of pages in the large page. Note
-* that we limited the number of possible pages already to
-* the number of pages in the large page.
-*/
-   if (address == (address & pmask) && cpa->numpages == (psize >> 
PAGE_SHIFT)) {
-   /*
-* The address is aligned and the number of pages
-* covers the full page.
-*/
-   new_pte = pfn_pte(old_pfn, new_prot);
-   __set_pmd_pte(kpte, address, new_pte);
-   cpa->flags |= CPA_FLUSHTLB;
-   do_split = 0;
-   }
+   /* All checks passed. Just change the large mapping entry */
+   new_pte = pfn_pte(old_pfn, new_prot);
+   __set_pmd_pte(kpte, address, new_pte);
+   cpa->flags |= CPA_FLUSHTLB;
+   do_split = 0;
 
 out_unlock:
spin_unlock(_lock);
-- 
2.7.4



[PATCH v3 3/5] x86/mm: add help function to check specific protection flags in range

2018-08-20 Thread Bin Yang
Introduce the needs_static_protections() helper to check specific
protection flags in range. It calls static_protection() to check
whether any part of the address/len range is forced to change from 'prot'.

Suggested-by: Dave Hansen 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 091f1d3..f630eb4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -367,6 +367,30 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 }
 
 /*
+ * static_protections() "forces" page protections for some address
+ * ranges.  Return true if any part of the address/len range is forced
+ * to change from 'prot'.
+ */
+static inline bool
+needs_static_protections(pgprot_t prot, unsigned long address,
+   unsigned long len, unsigned long pfn)
+{
+   int i;
+
+   address &= PAGE_MASK;
+   len = PFN_ALIGN(len);
+   for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) {
+   pgprot_t chk_prot = static_protections(prot, address, pfn);
+
+   if (pgprot_val(chk_prot) != pgprot_val(prot))
+   return true;
+   }
+
+   /* Does static_protections() demand a change ? */
+   return false;
+}
+
+/*
  * Lookup the page table entry for a virtual address in a specific pgd.
  * Return a pointer to the entry and the level of the mapping.
  */
@@ -556,7 +580,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
-   int i, do_split = 1;
+   int do_split = 1;
enum pg_level level;
 
if (cpa->force_split)
@@ -660,14 +684,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   pfn = old_pfn;
-   for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
-   pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
-
-   if (pgprot_val(chk_prot) != pgprot_val(new_prot))
-   goto out_unlock;
-   }
-
+   if (needs_static_protections(new_prot, addr, psize, old_pfn))
+   goto out_unlock;
 
/* All checks passed. Just change the large mapping entry */
new_pte = pfn_pte(old_pfn, new_prot);
-- 
2.7.4



[PATCH v3 2/5] x86/mm: avoid static_protection() checking if not whole large page attr change

2018-08-20 Thread Bin Yang
The range check whether the address is aligned to the large page and
covers the full large page (1G or 2M) is obvious to do _before_
static_protection() check, because if the requested range does not fit
and has a different pgprot_val() then it will decide to split after the
check anyway.

The approach and some of the comments came from Thomas Gleixner's
email example for how to do this

Suggested-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 35 ---
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 68613fd..091f1d3 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -645,11 +645,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
}
 
/*
+* If the requested address range is not aligned to the start of
+* the large page or does not cover the full range, split it up.
+* No matter what the static_protections() check below does, it
+* would anyway result in a split after doing all the check work
+* for nothing.
+*/
+   addr = address & pmask;
+   if (address != addr || cpa->numpages != numpages)
+   goto out_unlock;
+
+   /*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   addr = address & pmask;
pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
@@ -659,24 +669,11 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
}
 
 
-   /*
-* We need to change the attributes. Check, whether we can
-* change the large page in one go. We request a split, when
-* the address is not aligned and the number of pages is
-* smaller than the number of pages in the large page. Note
-* that we limited the number of possible pages already to
-* the number of pages in the large page.
-*/
-   if (address == (address & pmask) && cpa->numpages == (psize >> 
PAGE_SHIFT)) {
-   /*
-* The address is aligned and the number of pages
-* covers the full page.
-*/
-   new_pte = pfn_pte(old_pfn, new_prot);
-   __set_pmd_pte(kpte, address, new_pte);
-   cpa->flags |= CPA_FLUSHTLB;
-   do_split = 0;
-   }
+   /* All checks passed. Just change the large mapping entry */
+   new_pte = pfn_pte(old_pfn, new_prot);
+   __set_pmd_pte(kpte, address, new_pte);
+   cpa->flags |= CPA_FLUSHTLB;
+   do_split = 0;
 
 out_unlock:
spin_unlock(_lock);
-- 
2.7.4



[PATCH v3 3/5] x86/mm: add help function to check specific protection flags in range

2018-08-20 Thread Bin Yang
Introduce the needs_static_protections() helper to check specific
protection flags in range. It calls static_protection() to check
whether any part of the address/len range is forced to change from 'prot'.

Suggested-by: Dave Hansen 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 091f1d3..f630eb4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -367,6 +367,30 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 }
 
 /*
+ * static_protections() "forces" page protections for some address
+ * ranges.  Return true if any part of the address/len range is forced
+ * to change from 'prot'.
+ */
+static inline bool
+needs_static_protections(pgprot_t prot, unsigned long address,
+   unsigned long len, unsigned long pfn)
+{
+   int i;
+
+   address &= PAGE_MASK;
+   len = PFN_ALIGN(len);
+   for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) {
+   pgprot_t chk_prot = static_protections(prot, address, pfn);
+
+   if (pgprot_val(chk_prot) != pgprot_val(prot))
+   return true;
+   }
+
+   /* Does static_protections() demand a change ? */
+   return false;
+}
+
+/*
  * Lookup the page table entry for a virtual address in a specific pgd.
  * Return a pointer to the entry and the level of the mapping.
  */
@@ -556,7 +580,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
-   int i, do_split = 1;
+   int do_split = 1;
enum pg_level level;
 
if (cpa->force_split)
@@ -660,14 +684,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   pfn = old_pfn;
-   for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
-   pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
-
-   if (pgprot_val(chk_prot) != pgprot_val(new_prot))
-   goto out_unlock;
-   }
-
+   if (needs_static_protections(new_prot, addr, psize, old_pfn))
+   goto out_unlock;
 
/* All checks passed. Just change the large mapping entry */
new_pte = pfn_pte(old_pfn, new_prot);
-- 
2.7.4



[PATCH v3 1/5] x86/mm: avoid redundant checking if pgprot has no change

2018-08-20 Thread Bin Yang
In try_preserve_large_page(), the check for
pgprot_val(new_prot) == pgprot_val(old_port) can definitely
be done at first to avoid redundant checking.

The approach and some of the comments came from Thomas Gleixner's
email example for how to do this

Suggested-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8d6c34f..68613fd 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -629,6 +629,22 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, address, pfn);
 
/*
+* The static_protections() is used to check specific protection flags
+* for certain areas of memory. The old pgprot should be checked already
+* when it was applied before. If it's not, then this is a bug in some
+* other code and needs to be fixed there.
+*
+* If new pgprot is same as old pgprot, return directly without any
+* additional checking. The following static_protections() checking is
+* pointless if pgprot has no change. It can avoid the redundant
+* checking and optimize the performance of large page split checking.
+*/
+   if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+   do_split = 0;
+   goto out_unlock;
+   }
+
+   /*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
@@ -642,14 +658,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
goto out_unlock;
}
 
-   /*
-* If there are no changes, return. maxpages has been updated
-* above:
-*/
-   if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
-   do_split = 0;
-   goto out_unlock;
-   }
 
/*
 * We need to change the attributes. Check, whether we can
-- 
2.7.4



[PATCH v3 0/5] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-08-20 Thread Bin Yang


One problem is found when optimizing the x86 kernel boot time, that sometimes
the free_initmem() will take about 600ms, which is way too much for fast boot.

When changing a 4K page attr inside the large page range,
__change_page_attr() will call try_preserve_large_page() to decide
to split the big page or not. And try_preserve_large_page() will
call static_protections() to check all 4K pages inside the large page
range one by one.

free_initmem()  <-- free N pages
  free_init_pages()
set_memory_rw()
  change_page_attr_set()
change_page_attr_set_clr()
  __change_page_attr_set_clr()
__change_page_attr() <-- loop N times
  try_preserve_large_page()
static_protections() <-- worst case: loop (1G/4K = 262144) * N 
times

The problem is,  most of the  256K * N  times of call of static_proetections()
is _not_ necessary at all, as pointed out by Thomas Gleixner :

"The current code does the static_protection() check loop _before_ checking:

  1) Whether the new and the old pgprot are the same
  
  2) Whether the address range which needs to be changed is aligned to and
 covers the full large mapping

It does the static_protections() loop before #1 and #2 which can be
optimized."

The static_protections() check loop can be optimized to check for overlapping
ranges and then check explicitly for those without any looping.

Here are 5 patches for these optimizations:

Patch 1: check whether new pgprot is same as old pgprot first to avoid
 unnecessary static_protection() checking.

Patch 2: check whether it is whole large page attr change first to avoid
 unnecessary static_protection() checking.

Patch 3: add help function to check specific protection flags in range

Patch 4: Optimize the static_protection() by using overlap() check instead
 of within()

Patch 5: Add a check for catching a case where the existing mapping is wrong
 already

The approach and some of the comments came from Thomas's email example for how
to do this. Thanks again for Thomas's kind help.

Thanks,
Bin

Bin Yang (5):
  x86/mm: avoid redundant checking if pgprot has no change
  x86/mm: avoid static_protection() checking if not whole large page
attr change
  x86/mm: add help function to check specific protection flags in range
  x86/mm: optimize static_protection() by using overlap()
  x86/mm: add WARN_ON_ONCE() for wrong large page mapping

 arch/x86/mm/pageattr.c | 127 +
 1 file changed, 86 insertions(+), 41 deletions(-)

-- 
2.7.4



[PATCH v3 5/5] x86/mm: add WARN_ON_ONCE() for wrong large page mapping

2018-08-20 Thread Bin Yang
If there is a large page which contains an area which requires a
different mapping that the one which the large page provides,
then something went wrong _before_ this code is called.

Here we can catch a case where the existing mapping is wrong
already.

Inspired-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fd90c5b..91a250c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -625,6 +625,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 
psize = page_level_size(level);
pmask = page_level_mask(level);
+   addr = address & pmask;
 
/*
 * Calculate the number of pages, which fit into this large
@@ -636,6 +637,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
cpa->numpages = numpages;
 
/*
+* The old pgprot should not have any protection bit. Otherwise,
+* the existing mapping is wrong already.
+*/
+   WARN_ON_ONCE(needs_static_protections(old_prot, addr, psize, old_pfn));
+
+   /*
 * We are safe now. Check whether the new pgprot is the same:
 * Convert protection attributes to 4k-format, as cpa->mask* are set
 * up accordingly.
@@ -690,7 +697,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 * would anyway result in a split after doing all the check work
 * for nothing.
 */
-   addr = address & pmask;
if (address != addr || cpa->numpages != numpages)
goto out_unlock;
 
-- 
2.7.4



[PATCH v3 1/5] x86/mm: avoid redundant checking if pgprot has no change

2018-08-20 Thread Bin Yang
In try_preserve_large_page(), the check for
pgprot_val(new_prot) == pgprot_val(old_port) can definitely
be done at first to avoid redundant checking.

The approach and some of the comments came from Thomas Gleixner's
email example for how to do this

Suggested-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8d6c34f..68613fd 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -629,6 +629,22 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, address, pfn);
 
/*
+* The static_protections() is used to check specific protection flags
+* for certain areas of memory. The old pgprot should be checked already
+* when it was applied before. If it's not, then this is a bug in some
+* other code and needs to be fixed there.
+*
+* If new pgprot is same as old pgprot, return directly without any
+* additional checking. The following static_protections() checking is
+* pointless if pgprot has no change. It can avoid the redundant
+* checking and optimize the performance of large page split checking.
+*/
+   if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+   do_split = 0;
+   goto out_unlock;
+   }
+
+   /*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
@@ -642,14 +658,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
goto out_unlock;
}
 
-   /*
-* If there are no changes, return. maxpages has been updated
-* above:
-*/
-   if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
-   do_split = 0;
-   goto out_unlock;
-   }
 
/*
 * We need to change the attributes. Check, whether we can
-- 
2.7.4



[PATCH v3 0/5] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-08-20 Thread Bin Yang


One problem is found when optimizing the x86 kernel boot time, that sometimes
the free_initmem() will take about 600ms, which is way too much for fast boot.

When changing a 4K page attr inside the large page range,
__change_page_attr() will call try_preserve_large_page() to decide
to split the big page or not. And try_preserve_large_page() will
call static_protections() to check all 4K pages inside the large page
range one by one.

free_initmem()  <-- free N pages
  free_init_pages()
set_memory_rw()
  change_page_attr_set()
change_page_attr_set_clr()
  __change_page_attr_set_clr()
__change_page_attr() <-- loop N times
  try_preserve_large_page()
static_protections() <-- worst case: loop (1G/4K = 262144) * N 
times

The problem is,  most of the  256K * N  times of call of static_proetections()
is _not_ necessary at all, as pointed out by Thomas Gleixner :

"The current code does the static_protection() check loop _before_ checking:

  1) Whether the new and the old pgprot are the same
  
  2) Whether the address range which needs to be changed is aligned to and
 covers the full large mapping

It does the static_protections() loop before #1 and #2 which can be
optimized."

The static_protections() check loop can be optimized to check for overlapping
ranges and then check explicitly for those without any looping.

Here are 5 patches for these optimizations:

Patch 1: check whether new pgprot is same as old pgprot first to avoid
 unnecessary static_protection() checking.

Patch 2: check whether it is whole large page attr change first to avoid
 unnecessary static_protection() checking.

Patch 3: add help function to check specific protection flags in range

Patch 4: Optimize the static_protection() by using overlap() check instead
 of within()

Patch 5: Add a check for catching a case where the existing mapping is wrong
 already

The approach and some of the comments came from Thomas's email example for how
to do this. Thanks again for Thomas's kind help.

Thanks,
Bin

Bin Yang (5):
  x86/mm: avoid redundant checking if pgprot has no change
  x86/mm: avoid static_protection() checking if not whole large page
attr change
  x86/mm: add help function to check specific protection flags in range
  x86/mm: optimize static_protection() by using overlap()
  x86/mm: add WARN_ON_ONCE() for wrong large page mapping

 arch/x86/mm/pageattr.c | 127 +
 1 file changed, 86 insertions(+), 41 deletions(-)

-- 
2.7.4



[PATCH v3 5/5] x86/mm: add WARN_ON_ONCE() for wrong large page mapping

2018-08-20 Thread Bin Yang
If there is a large page which contains an area which requires a
different mapping that the one which the large page provides,
then something went wrong _before_ this code is called.

Here we can catch a case where the existing mapping is wrong
already.

Inspired-by: Thomas Gleixner 
Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fd90c5b..91a250c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -625,6 +625,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 
psize = page_level_size(level);
pmask = page_level_mask(level);
+   addr = address & pmask;
 
/*
 * Calculate the number of pages, which fit into this large
@@ -636,6 +637,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
cpa->numpages = numpages;
 
/*
+* The old pgprot should not have any protection bit. Otherwise,
+* the existing mapping is wrong already.
+*/
+   WARN_ON_ONCE(needs_static_protections(old_prot, addr, psize, old_pfn));
+
+   /*
 * We are safe now. Check whether the new pgprot is the same:
 * Convert protection attributes to 4k-format, as cpa->mask* are set
 * up accordingly.
@@ -690,7 +697,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 * would anyway result in a split after doing all the check work
 * for nothing.
 */
-   addr = address & pmask;
if (address != addr || cpa->numpages != numpages)
goto out_unlock;
 
-- 
2.7.4



[PATCH v2] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-07-04 Thread Bin Yang
When changing a 4K page attr inside 1G/2M large page range,
__change_page_attr() will call try_preserve_large_page() to decide
to split the big page or not. And try_preserve_large_page() will
call static_protections() to check all 4K pages inside the large page
range one by one. The check loop is very inefficient. In worst case,
static_protections() will be called for 1G/4K (262144) times.

This issue can be triggered by free_initmem() during kernel boot.
free_initmem()  <-- free N pages
  free_init_pages()
set_memory_rw()
  change_page_attr_set()
change_page_attr_set_clr()
  __change_page_attr_set_clr()
__change_page_attr() <-- loop N times
  try_preserve_large_page()
static_protections() <-- worst case: loop 262144 * N times

Instead of checking all pages, it can only check one page for same
overlapping range. This patch enhances static_protections() to return the
page num that all following pages are in the same overlapping range with
same protection flags. It can reduce the check loop from 262144 to less
than 10 times.

Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 72 --
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3bded76e..dee23ef 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -292,17 +292,27 @@ static void cpa_flush_array(unsigned long *start, int 
numpages, int cache,
  * checks and fixes these known static required protection bits.
  */
 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
-  unsigned long pfn)
+  unsigned long pfn, unsigned long *page_num)
 {
pgprot_t forbidden = __pgprot(0);
+   unsigned long tmp;
+   unsigned long num = PUD_PAGE_SIZE >> PAGE_SHIFT;
 
/*
 * The BIOS area between 640k and 1Mb needs to be executable for
 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 */
 #ifdef CONFIG_PCI_BIOS
-   if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END 
>> PAGE_SHIFT))
-   pgprot_val(forbidden) |= _PAGE_NX;
+   if (pcibios_enabled) {
+   tmp = (BIOS_BEGIN >> PAGE_SHIFT) > pfn ?
+   (BIOS_BEGIN >> PAGE_SHIFT) - pfn : ULONG_MAX;
+   if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT,
+   BIOS_END >> PAGE_SHIFT)) {
+   pgprot_val(forbidden) |= _PAGE_NX;
+   tmp = (BIOS_END >> PAGE_SHIFT) - pfn;
+   }
+   num = num > tmp ? tmp : num;
+   }
 #endif
 
/*
@@ -310,18 +320,30 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * Does not cover __inittext since that is gone later on. On
 * 64bit we do not enforce !NX on the low mapping
 */
-   if (within(address, (unsigned long)_text, (unsigned long)_etext))
+   tmp = (unsigned long)_text > address ?
+   ((unsigned long)_text - address) >> PAGE_SHIFT : ULONG_MAX;
+   if (within(address, (unsigned long)_text, (unsigned long)_etext)) {
pgprot_val(forbidden) |= _PAGE_NX;
+   tmp = ((unsigned long)_etext - address) >> PAGE_SHIFT;
+   }
+   num = num > tmp ? tmp : num;
 
/*
 * The .rodata section needs to be read-only. Using the pfn
 * catches all aliases.  This also includes __ro_after_init,
 * so do not enforce until kernel_set_to_readonly is true.
 */
-   if (kernel_set_to_readonly &&
-   within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
-  __pa_symbol(__end_rodata) >> PAGE_SHIFT))
-   pgprot_val(forbidden) |= _PAGE_RW;
+   if (kernel_set_to_readonly) {
+   tmp = (__pa_symbol(__start_rodata) >> PAGE_SHIFT) > pfn ?
+   (__pa_symbol(__start_rodata) >> PAGE_SHIFT) - pfn :
+   ULONG_MAX;
+   if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+   __pa_symbol(__end_rodata) >> PAGE_SHIFT)) {
+   pgprot_val(forbidden) |= _PAGE_RW;
+   tmp = (__pa_symbol(__end_rodata) >> PAGE_SHIFT) - pfn;
+   }
+   num = num > tmp ? tmp : num;
+   }
 
 #if defined(CONFIG_X86_64)
/*
@@ -333,11 +355,15 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * This will preserve the large page mappings for kernel text/data
 * at no extra cost.
 */
+   tmp = kernel_set_to_readonly && (unsigned long)_text > address ?
+   ((unsigned long)_text - a

[PATCH v2] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-07-04 Thread Bin Yang
When changing a 4K page attr inside 1G/2M large page range,
__change_page_attr() will call try_preserve_large_page() to decide
to split the big page or not. And try_preserve_large_page() will
call static_protections() to check all 4K pages inside the large page
range one by one. The check loop is very inefficient. In worst case,
static_protections() will be called for 1G/4K (262144) times.

This issue can be triggered by free_initmem() during kernel boot.
free_initmem()  <-- free N pages
  free_init_pages()
set_memory_rw()
  change_page_attr_set()
change_page_attr_set_clr()
  __change_page_attr_set_clr()
__change_page_attr() <-- loop N times
  try_preserve_large_page()
static_protections() <-- worst case: loop 262144 * N times

Instead of checking all pages, it can only check one page for same
overlapping range. This patch enhances static_protections() to return the
page num that all following pages are in the same overlapping range with
same protection flags. It can reduce the check loop from 262144 to less
than 10 times.

Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 72 --
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3bded76e..dee23ef 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -292,17 +292,27 @@ static void cpa_flush_array(unsigned long *start, int 
numpages, int cache,
  * checks and fixes these known static required protection bits.
  */
 static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
-  unsigned long pfn)
+  unsigned long pfn, unsigned long *page_num)
 {
pgprot_t forbidden = __pgprot(0);
+   unsigned long tmp;
+   unsigned long num = PUD_PAGE_SIZE >> PAGE_SHIFT;
 
/*
 * The BIOS area between 640k and 1Mb needs to be executable for
 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 */
 #ifdef CONFIG_PCI_BIOS
-   if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END 
>> PAGE_SHIFT))
-   pgprot_val(forbidden) |= _PAGE_NX;
+   if (pcibios_enabled) {
+   tmp = (BIOS_BEGIN >> PAGE_SHIFT) > pfn ?
+   (BIOS_BEGIN >> PAGE_SHIFT) - pfn : ULONG_MAX;
+   if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT,
+   BIOS_END >> PAGE_SHIFT)) {
+   pgprot_val(forbidden) |= _PAGE_NX;
+   tmp = (BIOS_END >> PAGE_SHIFT) - pfn;
+   }
+   num = num > tmp ? tmp : num;
+   }
 #endif
 
/*
@@ -310,18 +320,30 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * Does not cover __inittext since that is gone later on. On
 * 64bit we do not enforce !NX on the low mapping
 */
-   if (within(address, (unsigned long)_text, (unsigned long)_etext))
+   tmp = (unsigned long)_text > address ?
+   ((unsigned long)_text - address) >> PAGE_SHIFT : ULONG_MAX;
+   if (within(address, (unsigned long)_text, (unsigned long)_etext)) {
pgprot_val(forbidden) |= _PAGE_NX;
+   tmp = ((unsigned long)_etext - address) >> PAGE_SHIFT;
+   }
+   num = num > tmp ? tmp : num;
 
/*
 * The .rodata section needs to be read-only. Using the pfn
 * catches all aliases.  This also includes __ro_after_init,
 * so do not enforce until kernel_set_to_readonly is true.
 */
-   if (kernel_set_to_readonly &&
-   within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
-  __pa_symbol(__end_rodata) >> PAGE_SHIFT))
-   pgprot_val(forbidden) |= _PAGE_RW;
+   if (kernel_set_to_readonly) {
+   tmp = (__pa_symbol(__start_rodata) >> PAGE_SHIFT) > pfn ?
+   (__pa_symbol(__start_rodata) >> PAGE_SHIFT) - pfn :
+   ULONG_MAX;
+   if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+   __pa_symbol(__end_rodata) >> PAGE_SHIFT)) {
+   pgprot_val(forbidden) |= _PAGE_RW;
+   tmp = (__pa_symbol(__end_rodata) >> PAGE_SHIFT) - pfn;
+   }
+   num = num > tmp ? tmp : num;
+   }
 
 #if defined(CONFIG_X86_64)
/*
@@ -333,11 +355,15 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long address,
 * This will preserve the large page mappings for kernel text/data
 * at no extra cost.
 */
+   tmp = kernel_set_to_readonly && (unsigned long)_text > address ?
+   ((unsigned long)_text - a

[PATCH] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-06-28 Thread Bin Yang
This issue can be easily triggered by free_initmem() functuion on
x86_64 cpu.

When changing page attr, __change_page_attr_set_clr will call
__change_page_attr for every 4K page. And try_preserve_large_page
will be called to check whether it needs to split the large page.

If cpu supports "pdpe1gb", kernel will try to use 1G large page.
In worst case, it needs to check every 4K page in 1G range in
try_preserve_large_page function. If __change_page_attr_set_clr
needs to change lots of 4K pages, cpu will be stuck for long time.

This patch try to cache the last address which had been checked just
now. If the next address is in same big page, the cache will be used
without full range check.

Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3bded76e..b9241ac 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -552,16 +552,20 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
 {
+   static unsigned long address_cache;
+   static unsigned long do_split_cache = 1;
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
enum pg_level level;
 
-   if (cpa->force_split)
+   spin_lock(_lock);
+   if (cpa->force_split) {
+   do_split_cache = 1;
return 1;
+   }
 
-   spin_lock(_lock);
/*
 * Check for races, another CPU might have split this page
 * up already:
@@ -627,13 +631,25 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
 
new_prot = static_protections(req_prot, address, pfn);
 
+   addr = address & pmask;
+   pfn = old_pfn;
+   /*
+* If an address in same range had been checked just now, re-use the
+* cache value without full range check. In the worst case, it needs to
+* check every 4K page in 1G range, which causes cpu stuck for long
+* time.
+*/
+   if (!do_split_cache &&
+   address_cache >= addr && address_cache < nextpage_addr &&
+   pgprot_val(new_prot) == pgprot_val(old_prot)) {
+   do_split = do_split_cache;
+   goto out_unlock;
+   }
/*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   addr = address & pmask;
-   pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
@@ -670,6 +686,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
 
 out_unlock:
+   address_cache = address;
+   do_split_cache = do_split;
spin_unlock(_lock);
 
return do_split;
-- 
2.7.4



[PATCH] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr

2018-06-28 Thread Bin Yang
This issue can be easily triggered by free_initmem() functuion on
x86_64 cpu.

When changing page attr, __change_page_attr_set_clr will call
__change_page_attr for every 4K page. And try_preserve_large_page
will be called to check whether it needs to split the large page.

If cpu supports "pdpe1gb", kernel will try to use 1G large page.
In worst case, it needs to check every 4K page in 1G range in
try_preserve_large_page function. If __change_page_attr_set_clr
needs to change lots of 4K pages, cpu will be stuck for long time.

This patch try to cache the last address which had been checked just
now. If the next address is in same big page, the cache will be used
without full range check.

Signed-off-by: Bin Yang 
---
 arch/x86/mm/pageattr.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3bded76e..b9241ac 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -552,16 +552,20 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
 {
+   static unsigned long address_cache;
+   static unsigned long do_split_cache = 1;
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
enum pg_level level;
 
-   if (cpa->force_split)
+   spin_lock(_lock);
+   if (cpa->force_split) {
+   do_split_cache = 1;
return 1;
+   }
 
-   spin_lock(_lock);
/*
 * Check for races, another CPU might have split this page
 * up already:
@@ -627,13 +631,25 @@ try_preserve_large_page(pte_t *kpte, unsigned long 
address,
 
new_prot = static_protections(req_prot, address, pfn);
 
+   addr = address & pmask;
+   pfn = old_pfn;
+   /*
+* If an address in same range had been checked just now, re-use the
+* cache value without full range check. In the worst case, it needs to
+* check every 4K page in 1G range, which causes cpu stuck for long
+* time.
+*/
+   if (!do_split_cache &&
+   address_cache >= addr && address_cache < nextpage_addr &&
+   pgprot_val(new_prot) == pgprot_val(old_prot)) {
+   do_split = do_split_cache;
+   goto out_unlock;
+   }
/*
 * We need to check the full range, whether
 * static_protection() requires a different pgprot for one of
 * the pages in the range we try to preserve:
 */
-   addr = address & pmask;
-   pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
@@ -670,6 +686,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
 
 out_unlock:
+   address_cache = address;
+   do_split_cache = do_split;
spin_unlock(_lock);
 
return do_split;
-- 
2.7.4