[PATCH] drm/i915: Fix i915_gem_wait_for_idle oops due to active_requests check
i915_gem_wait_for_idle() waits for all requests being completed and calls i915_retire_requests() to retire them. It assumes the active_requests should be zero finally. In i915_retire_requests(), it will retire all requests on the active rings. Unfortunately, active_requests is increased in i915_request_alloc() and reduced in i915_request_retire(), but the request is added into active rings in i915_request_add(). If i915_gem_wait_for_idle() is called between i915_request_alloc() and i915_request_add(), this request will not be retired. Then, the active_requests will not be zero in the end. Normally, i915_request_alloc() and i915_request_add() will be called in sequence with drm.struct_mutex locked. But in intel_vgpu_create_workload(), it will pre-allocate the request and call i915_request_add() in the workload thread for performance optimization. The above issue will be triggered. This patch introduced a new counter named reserved_requests for request allocation. The active_requests will be increased when the request is really added into the active rings. 8<- below is the oops when above issue is hitted. [2018-11-28 23:17:54] [12278.310417] kernel BUG at drivers/gpu/drm/i915/i915_gem.c:4702! [2018-11-28 23:17:54] [12278.310802] invalid opcode: [#1] PREEMPT SMP [2018-11-28 23:17:54] [12278.311012] CPU: 0 PID: 61 Comm: kswapd0 Tainted: G U WC4.19.0-26.iot-lts2018-sos #1 [2018-11-28 23:17:54] [12278.311393] RIP: 0010:i915_gem_wait_for_idle.part.78.cold.114+0x45/0x47 [2018-11-28 23:17:54] [12278.311675] Code: 7b 8b ae ff 48 8b 35 e6 92 3c 01 49 c7 c0 af 48 55 a9 b9 5e 12 00 00 48 c7 c2 50 7a 0b a9 48 c7 c7 f4 e6 60 a8 e8 37 38 b6 ff <0f> 0b 48 c7 c1 a8 59 55 a9 ba b8 12 00 00 48 c7 c6 20 7a 0b a9 48 [2018-11-28 23:17:55] [12278.312447] RSP: 0018:8e31acd8bbb8 EFLAGS: 00010246 [2018-11-28 23:17:55] [12278.312673] RAX: 000e RBX: 000a RCX: [2018-11-28 23:17:55] [12278.312971] RDX: 0001 RSI: 0008 RDI: 8e31ae841400 [2018-11-28 23:17:55] [12278.313268] RBP: 8e31acea8340 R08: 01416578 R09: 8e31aea15000 [2018-11-28 23:17:55] [12278.313566] R10: 8e31ae807100 R11: 8e31ae841400 R12: 8e31acea [2018-11-28 23:17:55] [12278.313863] R13: 0b2ab1d38ed0 R14: R15: 8e31acd8bd70 [2018-11-28 23:17:55] [12278.314162] FS: () GS:8e31afa0() knlGS: [2018-11-28 23:17:55] [12278.314499] CS: 0010 DS: ES: CR0: 80050033 [2018-11-28 23:17:55] [12278.314741] CR2: 7ff94948f000 CR3: 000226813000 CR4: 003406f0 [2018-11-28 23:17:55] [12278.315039] Call Trace: [2018-11-28 23:17:55] [12278.315162] i915_gem_shrink+0x3b7/0x4b0 [2018-11-28 23:17:55] [12278.315340] i915_gem_shrinker_scan+0x104/0x130 [2018-11-28 23:17:55] [12278.315537] do_shrink_slab+0x12c/0x2c0 [2018-11-28 23:17:55] [12278.315706] shrink_slab+0x225/0x2c0 [2018-11-28 23:17:55] [12278.315864] shrink_node+0xe4/0x430 [2018-11-28 23:17:55] [12278.316018] kswapd+0x3ce/0x730 [2018-11-28 23:17:55] [12278.316161] ? mem_cgroup_shrink_node+0x1a0/0x1a0 [2018-11-28 23:17:55] [12278.316365] kthread+0x11e/0x140 [2018-11-28 23:17:55] [12278.316508] ? kthread_create_worker_on_cpu+0x70/0x70 [2018-11-28 23:17:55] [12278.316727] ret_from_fork+0x3a/0x50 [2018-11-28 23:17:55] [12278.316884] Modules linked in: igb_avb(C) xhci_pci xhci_hcd dca ici_isys_mod ipu4_acpi intel_ipu4_isys_csslib intel_ipu4_psys intel_ipu4_psys_csslib intel_ipu4_mmu intel_ipu4 iova crlmodule_lite Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109005 Signed-off-by: Bin Yang --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 10 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 815db160b966..7a757f0f504f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1948,6 +1948,7 @@ struct drm_i915_private { struct list_head active_rings; struct list_head closed_vma; u32 active_requests; + u32 reserved_requests; u32 request_serial; /** diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d92147ab4489..1873e21c84c1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -200,7 +200,7 @@ void i915_gem_unpark(struct drm_i915_private *i915) GEM_TRACE("\n"); lockdep_assert_held(>drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); + GEM_BUG_ON(!i915->gt.reserved_requests); if (i915->gt.awake) return; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 637909c59f1f..394283799ee9 10
[PATCH] pstore: fix incorrect persistent ram buffer mapping
persistent_ram_vmap() returns the page start vaddr. persistent_ram_iomap() supports non-page-aligned mapping. persistent_ram_buffer_map() always adds offset-in-page to the vaddr returned from these two functions, which causes incorrect mapping of non-page-aligned persistent ram buffer. Signed-off-by: Bin Yang --- fs/pstore/ram_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 951a14e..7c05fdd 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -429,7 +429,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, vaddr = vmap(pages, page_count, VM_MAP, prot); kfree(pages); - return vaddr; + return vaddr + offset_in_page(start); } static void *persistent_ram_iomap(phys_addr_t start, size_t size, @@ -468,7 +468,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return -ENOMEM; } - prz->buffer = prz->vaddr + offset_in_page(start); + prz->buffer = prz->vaddr; prz->buffer_size = size - sizeof(struct persistent_ram_buffer); return 0; @@ -515,7 +515,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) if (prz->vaddr) { if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { - vunmap(prz->vaddr); + vunmap(prz->vaddr - offset_in_page(prz->paddr)); } else { iounmap(prz->vaddr); release_mem_region(prz->paddr, prz->size); -- 2.7.4
[PATCH] pstore: fix incorrect persistent ram buffer mapping
persistent_ram_vmap() returns the page start vaddr. persistent_ram_iomap() supports non-page-aligned mapping. persistent_ram_buffer_map() always adds offset-in-page to the vaddr returned from these two functions, which causes incorrect mapping of non-page-aligned persistent ram buffer. Signed-off-by: Bin Yang --- fs/pstore/ram_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 951a14e..7c05fdd 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -429,7 +429,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, vaddr = vmap(pages, page_count, VM_MAP, prot); kfree(pages); - return vaddr; + return vaddr + offset_in_page(start); } static void *persistent_ram_iomap(phys_addr_t start, size_t size, @@ -468,7 +468,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return -ENOMEM; } - prz->buffer = prz->vaddr + offset_in_page(start); + prz->buffer = prz->vaddr; prz->buffer_size = size - sizeof(struct persistent_ram_buffer); return 0; @@ -515,7 +515,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) if (prz->vaddr) { if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { - vunmap(prz->vaddr); + vunmap(prz->vaddr - offset_in_page(prz->paddr)); } else { iounmap(prz->vaddr); release_mem_region(prz->paddr, prz->size); -- 2.7.4
[PATCH v3 4/5] x86/mm: optimize static_protection() by using overlap()
When changing a 4K page attr inside the large page range, try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range. In the worst case, when changing a 4K page attr inside 1G large page range, static_protections() will be called for 262144 times for single 4K page check. It can be optimized to check for overlapping ranges instead of looping all pages. If the checking range has overlap with a specific protection area, it should add the corresponding protection flag. Suggested-by: Dave Hansen Suggested-by: Shevchenko, Andriy Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 50 +- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f630eb4..fd90c5b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,6 +106,21 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr <= end; } +static inline bool +overlap(unsigned long start1, unsigned long end1, + unsigned long start2, unsigned long end2) +{ + /* Is 'start2' within area 1? */ + if (start1 <= start2 && end1 > start2) + return true; + + /* Is 'start1' within area 2? */ + if (start2 <= start1 && end2 > start1) + return true; + + return false; +} + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -293,7 +308,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, * checks and fixes these known static required protection bits. */ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, - unsigned long pfn) + unsigned long len, unsigned long pfn) { pgprot_t forbidden = __pgprot(0); @@ -302,7 +317,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ #ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) + if (pcibios_enabled && + overlap(pfn, pfn + PFN_DOWN(len), + PFN_DOWN(BIOS_BEGIN), PFN_DOWN(BIOS_END))) pgprot_val(forbidden) |= _PAGE_NX; #endif @@ -311,7 +328,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) + if (overlap(address, address + len, + (unsigned long)_text, (unsigned long)_etext)) pgprot_val(forbidden) |= _PAGE_NX; /* @@ -320,8 +338,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * so do not enforce until kernel_set_to_readonly is true. */ if (kernel_set_to_readonly && - within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, - __pa_symbol(__end_rodata) >> PAGE_SHIFT)) + overlap(pfn, pfn + PFN_DOWN(len), + PFN_DOWN(__pa_symbol(__start_rodata)), + PFN_DOWN(__pa_symbol(__end_rodata pgprot_val(forbidden) |= _PAGE_RW; #if defined(CONFIG_X86_64) @@ -335,8 +354,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * at no extra cost. */ if (kernel_set_to_readonly && - within(address, (unsigned long)_text, - (unsigned long)__end_rodata_hpage_align)) { + overlap(address, address + len, (unsigned long)_text, + (unsigned long)__end_rodata_hpage_align)) { unsigned int level; /* @@ -375,19 +394,14 @@ static inline bool needs_static_protections(pgprot_t prot, unsigned long address, unsigned long len, unsigned long pfn) { - int i; + pgprot_t chk_prot; address &= PAGE_MASK; len = PFN_ALIGN(len); - for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(prot, address, pfn); - - if (pgprot_val(chk_prot) != pgprot_val(prot)) - return true; - } + chk_prot = static_protections(prot, address, len, pfn); /* Does static_protections() demand a change ? */ - return false; + return pgprot_val(chk_prot) != pgprot_val(prot); } /* @@ -650,7 +664,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pfn = old_pfn + ((address & (psize - 1)) >&g
[PATCH v3 4/5] x86/mm: optimize static_protection() by using overlap()
When changing a 4K page attr inside the large page range, try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range. In the worst case, when changing a 4K page attr inside 1G large page range, static_protections() will be called for 262144 times for single 4K page check. It can be optimized to check for overlapping ranges instead of looping all pages. If the checking range has overlap with a specific protection area, it should add the corresponding protection flag. Suggested-by: Dave Hansen Suggested-by: Shevchenko, Andriy Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 50 +- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f630eb4..fd90c5b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,6 +106,21 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr <= end; } +static inline bool +overlap(unsigned long start1, unsigned long end1, + unsigned long start2, unsigned long end2) +{ + /* Is 'start2' within area 1? */ + if (start1 <= start2 && end1 > start2) + return true; + + /* Is 'start1' within area 2? */ + if (start2 <= start1 && end2 > start1) + return true; + + return false; +} + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -293,7 +308,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, * checks and fixes these known static required protection bits. */ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, - unsigned long pfn) + unsigned long len, unsigned long pfn) { pgprot_t forbidden = __pgprot(0); @@ -302,7 +317,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ #ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) + if (pcibios_enabled && + overlap(pfn, pfn + PFN_DOWN(len), + PFN_DOWN(BIOS_BEGIN), PFN_DOWN(BIOS_END))) pgprot_val(forbidden) |= _PAGE_NX; #endif @@ -311,7 +328,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) + if (overlap(address, address + len, + (unsigned long)_text, (unsigned long)_etext)) pgprot_val(forbidden) |= _PAGE_NX; /* @@ -320,8 +338,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * so do not enforce until kernel_set_to_readonly is true. */ if (kernel_set_to_readonly && - within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, - __pa_symbol(__end_rodata) >> PAGE_SHIFT)) + overlap(pfn, pfn + PFN_DOWN(len), + PFN_DOWN(__pa_symbol(__start_rodata)), + PFN_DOWN(__pa_symbol(__end_rodata pgprot_val(forbidden) |= _PAGE_RW; #if defined(CONFIG_X86_64) @@ -335,8 +354,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * at no extra cost. */ if (kernel_set_to_readonly && - within(address, (unsigned long)_text, - (unsigned long)__end_rodata_hpage_align)) { + overlap(address, address + len, (unsigned long)_text, + (unsigned long)__end_rodata_hpage_align)) { unsigned int level; /* @@ -375,19 +394,14 @@ static inline bool needs_static_protections(pgprot_t prot, unsigned long address, unsigned long len, unsigned long pfn) { - int i; + pgprot_t chk_prot; address &= PAGE_MASK; len = PFN_ALIGN(len); - for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(prot, address, pfn); - - if (pgprot_val(chk_prot) != pgprot_val(prot)) - return true; - } + chk_prot = static_protections(prot, address, len, pfn); /* Does static_protections() demand a change ? */ - return false; + return pgprot_val(chk_prot) != pgprot_val(prot); } /* @@ -650,7 +664,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pfn = old_pfn + ((address & (psize - 1)) >&g
[PATCH v3 2/5] x86/mm: avoid static_protection() checking if not whole large page attr change
The range check whether the address is aligned to the large page and covers the full large page (1G or 2M) is obvious to do _before_ static_protection() check, because if the requested range does not fit and has a different pgprot_val() then it will decide to split after the check anyway. The approach and some of the comments came from Thomas Gleixner's email example for how to do this Suggested-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 35 --- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 68613fd..091f1d3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -645,11 +645,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } /* +* If the requested address range is not aligned to the start of +* the large page or does not cover the full range, split it up. +* No matter what the static_protections() check below does, it +* would anyway result in a split after doing all the check work +* for nothing. +*/ + addr = address & pmask; + if (address != addr || cpa->numpages != numpages) + goto out_unlock; + + /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address & pmask; pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -659,24 +669,11 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } - /* -* We need to change the attributes. Check, whether we can -* change the large page in one go. We request a split, when -* the address is not aligned and the number of pages is -* smaller than the number of pages in the large page. Note -* that we limited the number of possible pages already to -* the number of pages in the large page. -*/ - if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { - /* -* The address is aligned and the number of pages -* covers the full page. -*/ - new_pte = pfn_pte(old_pfn, new_prot); - __set_pmd_pte(kpte, address, new_pte); - cpa->flags |= CPA_FLUSHTLB; - do_split = 0; - } + /* All checks passed. Just change the large mapping entry */ + new_pte = pfn_pte(old_pfn, new_prot); + __set_pmd_pte(kpte, address, new_pte); + cpa->flags |= CPA_FLUSHTLB; + do_split = 0; out_unlock: spin_unlock(_lock); -- 2.7.4
[PATCH v3 3/5] x86/mm: add help function to check specific protection flags in range
Introduce the needs_static_protections() helper to check specific protection flags in range. It calls static_protection() to check whether any part of the address/len range is forced to change from 'prot'. Suggested-by: Dave Hansen Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 36 +++- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 091f1d3..f630eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -367,6 +367,30 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, } /* + * static_protections() "forces" page protections for some address + * ranges. Return true if any part of the address/len range is forced + * to change from 'prot'. + */ +static inline bool +needs_static_protections(pgprot_t prot, unsigned long address, + unsigned long len, unsigned long pfn) +{ + int i; + + address &= PAGE_MASK; + len = PFN_ALIGN(len); + for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(prot, address, pfn); + + if (pgprot_val(chk_prot) != pgprot_val(prot)) + return true; + } + + /* Does static_protections() demand a change ? */ + return false; +} + +/* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. */ @@ -556,7 +580,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; - int i, do_split = 1; + int do_split = 1; enum pg_level level; if (cpa->force_split) @@ -660,14 +684,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - pfn = old_pfn; - for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(req_prot, addr, pfn); - - if (pgprot_val(chk_prot) != pgprot_val(new_prot)) - goto out_unlock; - } - + if (needs_static_protections(new_prot, addr, psize, old_pfn)) + goto out_unlock; /* All checks passed. Just change the large mapping entry */ new_pte = pfn_pte(old_pfn, new_prot); -- 2.7.4
[PATCH v3 2/5] x86/mm: avoid static_protection() checking if not whole large page attr change
The range check whether the address is aligned to the large page and covers the full large page (1G or 2M) is obvious to do _before_ static_protection() check, because if the requested range does not fit and has a different pgprot_val() then it will decide to split after the check anyway. The approach and some of the comments came from Thomas Gleixner's email example for how to do this Suggested-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 35 --- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 68613fd..091f1d3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -645,11 +645,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } /* +* If the requested address range is not aligned to the start of +* the large page or does not cover the full range, split it up. +* No matter what the static_protections() check below does, it +* would anyway result in a split after doing all the check work +* for nothing. +*/ + addr = address & pmask; + if (address != addr || cpa->numpages != numpages) + goto out_unlock; + + /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address & pmask; pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -659,24 +669,11 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } - /* -* We need to change the attributes. Check, whether we can -* change the large page in one go. We request a split, when -* the address is not aligned and the number of pages is -* smaller than the number of pages in the large page. Note -* that we limited the number of possible pages already to -* the number of pages in the large page. -*/ - if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { - /* -* The address is aligned and the number of pages -* covers the full page. -*/ - new_pte = pfn_pte(old_pfn, new_prot); - __set_pmd_pte(kpte, address, new_pte); - cpa->flags |= CPA_FLUSHTLB; - do_split = 0; - } + /* All checks passed. Just change the large mapping entry */ + new_pte = pfn_pte(old_pfn, new_prot); + __set_pmd_pte(kpte, address, new_pte); + cpa->flags |= CPA_FLUSHTLB; + do_split = 0; out_unlock: spin_unlock(_lock); -- 2.7.4
[PATCH v3 3/5] x86/mm: add help function to check specific protection flags in range
Introduce the needs_static_protections() helper to check specific protection flags in range. It calls static_protection() to check whether any part of the address/len range is forced to change from 'prot'. Suggested-by: Dave Hansen Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 36 +++- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 091f1d3..f630eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -367,6 +367,30 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, } /* + * static_protections() "forces" page protections for some address + * ranges. Return true if any part of the address/len range is forced + * to change from 'prot'. + */ +static inline bool +needs_static_protections(pgprot_t prot, unsigned long address, + unsigned long len, unsigned long pfn) +{ + int i; + + address &= PAGE_MASK; + len = PFN_ALIGN(len); + for (i = 0; i < (len >> PAGE_SHIFT); i++, address += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(prot, address, pfn); + + if (pgprot_val(chk_prot) != pgprot_val(prot)) + return true; + } + + /* Does static_protections() demand a change ? */ + return false; +} + +/* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. */ @@ -556,7 +580,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; - int i, do_split = 1; + int do_split = 1; enum pg_level level; if (cpa->force_split) @@ -660,14 +684,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - pfn = old_pfn; - for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(req_prot, addr, pfn); - - if (pgprot_val(chk_prot) != pgprot_val(new_prot)) - goto out_unlock; - } - + if (needs_static_protections(new_prot, addr, psize, old_pfn)) + goto out_unlock; /* All checks passed. Just change the large mapping entry */ new_pte = pfn_pte(old_pfn, new_prot); -- 2.7.4
[PATCH v3 1/5] x86/mm: avoid redundant checking if pgprot has no change
In try_preserve_large_page(), the check for pgprot_val(new_prot) == pgprot_val(old_port) can definitely be done at first to avoid redundant checking. The approach and some of the comments came from Thomas Gleixner's email example for how to do this Suggested-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 24 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8d6c34f..68613fd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -629,6 +629,22 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, address, pfn); /* +* The static_protections() is used to check specific protection flags +* for certain areas of memory. The old pgprot should be checked already +* when it was applied before. If it's not, then this is a bug in some +* other code and needs to be fixed there. +* +* If new pgprot is same as old pgprot, return directly without any +* additional checking. The following static_protections() checking is +* pointless if pgprot has no change. It can avoid the redundant +* checking and optimize the performance of large page split checking. +*/ + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { + do_split = 0; + goto out_unlock; + } + + /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: @@ -642,14 +658,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, goto out_unlock; } - /* -* If there are no changes, return. maxpages has been updated -* above: -*/ - if (pgprot_val(new_prot) == pgprot_val(old_prot)) { - do_split = 0; - goto out_unlock; - } /* * We need to change the attributes. Check, whether we can -- 2.7.4
[PATCH v3 0/5] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
One problem is found when optimizing the x86 kernel boot time, that sometimes the free_initmem() will take about 600ms, which is way too much for fast boot. When changing a 4K page attr inside the large page range, __change_page_attr() will call try_preserve_large_page() to decide to split the big page or not. And try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range one by one. free_initmem() <-- free N pages free_init_pages() set_memory_rw() change_page_attr_set() change_page_attr_set_clr() __change_page_attr_set_clr() __change_page_attr() <-- loop N times try_preserve_large_page() static_protections() <-- worst case: loop (1G/4K = 262144) * N times The problem is, most of the 256K * N times of call of static_proetections() is _not_ necessary at all, as pointed out by Thomas Gleixner : "The current code does the static_protection() check loop _before_ checking: 1) Whether the new and the old pgprot are the same 2) Whether the address range which needs to be changed is aligned to and covers the full large mapping It does the static_protections() loop before #1 and #2 which can be optimized." The static_protections() check loop can be optimized to check for overlapping ranges and then check explicitly for those without any looping. Here are 5 patches for these optimizations: Patch 1: check whether new pgprot is same as old pgprot first to avoid unnecessary static_protection() checking. Patch 2: check whether it is whole large page attr change first to avoid unnecessary static_protection() checking. Patch 3: add help function to check specific protection flags in range Patch 4: Optimize the static_protection() by using overlap() check instead of within() Patch 5: Add a check for catching a case where the existing mapping is wrong already The approach and some of the comments came from Thomas's email example for how to do this. Thanks again for Thomas's kind help. Thanks, Bin Bin Yang (5): x86/mm: avoid redundant checking if pgprot has no change x86/mm: avoid static_protection() checking if not whole large page attr change x86/mm: add help function to check specific protection flags in range x86/mm: optimize static_protection() by using overlap() x86/mm: add WARN_ON_ONCE() for wrong large page mapping arch/x86/mm/pageattr.c | 127 + 1 file changed, 86 insertions(+), 41 deletions(-) -- 2.7.4
[PATCH v3 5/5] x86/mm: add WARN_ON_ONCE() for wrong large page mapping
If there is a large page which contains an area which requires a different mapping that the one which the large page provides, then something went wrong _before_ this code is called. Here we can catch a case where the existing mapping is wrong already. Inspired-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fd90c5b..91a250c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -625,6 +625,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, psize = page_level_size(level); pmask = page_level_mask(level); + addr = address & pmask; /* * Calculate the number of pages, which fit into this large @@ -636,6 +637,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, cpa->numpages = numpages; /* +* The old pgprot should not have any protection bit. Otherwise, +* the existing mapping is wrong already. +*/ + WARN_ON_ONCE(needs_static_protections(old_prot, addr, psize, old_pfn)); + + /* * We are safe now. Check whether the new pgprot is the same: * Convert protection attributes to 4k-format, as cpa->mask* are set * up accordingly. @@ -690,7 +697,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * would anyway result in a split after doing all the check work * for nothing. */ - addr = address & pmask; if (address != addr || cpa->numpages != numpages) goto out_unlock; -- 2.7.4
[PATCH v3 1/5] x86/mm: avoid redundant checking if pgprot has no change
In try_preserve_large_page(), the check for pgprot_val(new_prot) == pgprot_val(old_port) can definitely be done at first to avoid redundant checking. The approach and some of the comments came from Thomas Gleixner's email example for how to do this Suggested-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 24 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8d6c34f..68613fd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -629,6 +629,22 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, address, pfn); /* +* The static_protections() is used to check specific protection flags +* for certain areas of memory. The old pgprot should be checked already +* when it was applied before. If it's not, then this is a bug in some +* other code and needs to be fixed there. +* +* If new pgprot is same as old pgprot, return directly without any +* additional checking. The following static_protections() checking is +* pointless if pgprot has no change. It can avoid the redundant +* checking and optimize the performance of large page split checking. +*/ + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { + do_split = 0; + goto out_unlock; + } + + /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: @@ -642,14 +658,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, goto out_unlock; } - /* -* If there are no changes, return. maxpages has been updated -* above: -*/ - if (pgprot_val(new_prot) == pgprot_val(old_prot)) { - do_split = 0; - goto out_unlock; - } /* * We need to change the attributes. Check, whether we can -- 2.7.4
[PATCH v3 0/5] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
One problem is found when optimizing the x86 kernel boot time, that sometimes the free_initmem() will take about 600ms, which is way too much for fast boot. When changing a 4K page attr inside the large page range, __change_page_attr() will call try_preserve_large_page() to decide to split the big page or not. And try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range one by one. free_initmem() <-- free N pages free_init_pages() set_memory_rw() change_page_attr_set() change_page_attr_set_clr() __change_page_attr_set_clr() __change_page_attr() <-- loop N times try_preserve_large_page() static_protections() <-- worst case: loop (1G/4K = 262144) * N times The problem is, most of the 256K * N times of call of static_proetections() is _not_ necessary at all, as pointed out by Thomas Gleixner : "The current code does the static_protection() check loop _before_ checking: 1) Whether the new and the old pgprot are the same 2) Whether the address range which needs to be changed is aligned to and covers the full large mapping It does the static_protections() loop before #1 and #2 which can be optimized." The static_protections() check loop can be optimized to check for overlapping ranges and then check explicitly for those without any looping. Here are 5 patches for these optimizations: Patch 1: check whether new pgprot is same as old pgprot first to avoid unnecessary static_protection() checking. Patch 2: check whether it is whole large page attr change first to avoid unnecessary static_protection() checking. Patch 3: add help function to check specific protection flags in range Patch 4: Optimize the static_protection() by using overlap() check instead of within() Patch 5: Add a check for catching a case where the existing mapping is wrong already The approach and some of the comments came from Thomas's email example for how to do this. Thanks again for Thomas's kind help. Thanks, Bin Bin Yang (5): x86/mm: avoid redundant checking if pgprot has no change x86/mm: avoid static_protection() checking if not whole large page attr change x86/mm: add help function to check specific protection flags in range x86/mm: optimize static_protection() by using overlap() x86/mm: add WARN_ON_ONCE() for wrong large page mapping arch/x86/mm/pageattr.c | 127 + 1 file changed, 86 insertions(+), 41 deletions(-) -- 2.7.4
[PATCH v3 5/5] x86/mm: add WARN_ON_ONCE() for wrong large page mapping
If there is a large page which contains an area which requires a different mapping that the one which the large page provides, then something went wrong _before_ this code is called. Here we can catch a case where the existing mapping is wrong already. Inspired-by: Thomas Gleixner Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fd90c5b..91a250c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -625,6 +625,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, psize = page_level_size(level); pmask = page_level_mask(level); + addr = address & pmask; /* * Calculate the number of pages, which fit into this large @@ -636,6 +637,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, cpa->numpages = numpages; /* +* The old pgprot should not have any protection bit. Otherwise, +* the existing mapping is wrong already. +*/ + WARN_ON_ONCE(needs_static_protections(old_prot, addr, psize, old_pfn)); + + /* * We are safe now. Check whether the new pgprot is the same: * Convert protection attributes to 4k-format, as cpa->mask* are set * up accordingly. @@ -690,7 +697,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * would anyway result in a split after doing all the check work * for nothing. */ - addr = address & pmask; if (address != addr || cpa->numpages != numpages) goto out_unlock; -- 2.7.4
[PATCH v2] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
When changing a 4K page attr inside 1G/2M large page range, __change_page_attr() will call try_preserve_large_page() to decide to split the big page or not. And try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range one by one. The check loop is very inefficient. In worst case, static_protections() will be called for 1G/4K (262144) times. This issue can be triggered by free_initmem() during kernel boot. free_initmem() <-- free N pages free_init_pages() set_memory_rw() change_page_attr_set() change_page_attr_set_clr() __change_page_attr_set_clr() __change_page_attr() <-- loop N times try_preserve_large_page() static_protections() <-- worst case: loop 262144 * N times Instead of checking all pages, it can only check one page for same overlapping range. This patch enhances static_protections() to return the page num that all following pages are in the same overlapping range with same protection flags. It can reduce the check loop from 262144 to less than 10 times. Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 72 -- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3bded76e..dee23ef 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -292,17 +292,27 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, * checks and fixes these known static required protection bits. */ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, - unsigned long pfn) + unsigned long pfn, unsigned long *page_num) { pgprot_t forbidden = __pgprot(0); + unsigned long tmp; + unsigned long num = PUD_PAGE_SIZE >> PAGE_SHIFT; /* * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ #ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_NX; + if (pcibios_enabled) { + tmp = (BIOS_BEGIN >> PAGE_SHIFT) > pfn ? + (BIOS_BEGIN >> PAGE_SHIFT) - pfn : ULONG_MAX; + if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, + BIOS_END >> PAGE_SHIFT)) { + pgprot_val(forbidden) |= _PAGE_NX; + tmp = (BIOS_END >> PAGE_SHIFT) - pfn; + } + num = num > tmp ? tmp : num; + } #endif /* @@ -310,18 +320,30 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) + tmp = (unsigned long)_text > address ? + ((unsigned long)_text - address) >> PAGE_SHIFT : ULONG_MAX; + if (within(address, (unsigned long)_text, (unsigned long)_etext)) { pgprot_val(forbidden) |= _PAGE_NX; + tmp = ((unsigned long)_etext - address) >> PAGE_SHIFT; + } + num = num > tmp ? tmp : num; /* * The .rodata section needs to be read-only. Using the pfn * catches all aliases. This also includes __ro_after_init, * so do not enforce until kernel_set_to_readonly is true. */ - if (kernel_set_to_readonly && - within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, - __pa_symbol(__end_rodata) >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_RW; + if (kernel_set_to_readonly) { + tmp = (__pa_symbol(__start_rodata) >> PAGE_SHIFT) > pfn ? + (__pa_symbol(__start_rodata) >> PAGE_SHIFT) - pfn : + ULONG_MAX; + if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + __pa_symbol(__end_rodata) >> PAGE_SHIFT)) { + pgprot_val(forbidden) |= _PAGE_RW; + tmp = (__pa_symbol(__end_rodata) >> PAGE_SHIFT) - pfn; + } + num = num > tmp ? tmp : num; + } #if defined(CONFIG_X86_64) /* @@ -333,11 +355,15 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * This will preserve the large page mappings for kernel text/data * at no extra cost. */ + tmp = kernel_set_to_readonly && (unsigned long)_text > address ? + ((unsigned long)_text - a
[PATCH v2] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
When changing a 4K page attr inside 1G/2M large page range, __change_page_attr() will call try_preserve_large_page() to decide to split the big page or not. And try_preserve_large_page() will call static_protections() to check all 4K pages inside the large page range one by one. The check loop is very inefficient. In worst case, static_protections() will be called for 1G/4K (262144) times. This issue can be triggered by free_initmem() during kernel boot. free_initmem() <-- free N pages free_init_pages() set_memory_rw() change_page_attr_set() change_page_attr_set_clr() __change_page_attr_set_clr() __change_page_attr() <-- loop N times try_preserve_large_page() static_protections() <-- worst case: loop 262144 * N times Instead of checking all pages, it can only check one page for same overlapping range. This patch enhances static_protections() to return the page num that all following pages are in the same overlapping range with same protection flags. It can reduce the check loop from 262144 to less than 10 times. Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 72 -- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3bded76e..dee23ef 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -292,17 +292,27 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, * checks and fixes these known static required protection bits. */ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, - unsigned long pfn) + unsigned long pfn, unsigned long *page_num) { pgprot_t forbidden = __pgprot(0); + unsigned long tmp; + unsigned long num = PUD_PAGE_SIZE >> PAGE_SHIFT; /* * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ #ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_NX; + if (pcibios_enabled) { + tmp = (BIOS_BEGIN >> PAGE_SHIFT) > pfn ? + (BIOS_BEGIN >> PAGE_SHIFT) - pfn : ULONG_MAX; + if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, + BIOS_END >> PAGE_SHIFT)) { + pgprot_val(forbidden) |= _PAGE_NX; + tmp = (BIOS_END >> PAGE_SHIFT) - pfn; + } + num = num > tmp ? tmp : num; + } #endif /* @@ -310,18 +320,30 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) + tmp = (unsigned long)_text > address ? + ((unsigned long)_text - address) >> PAGE_SHIFT : ULONG_MAX; + if (within(address, (unsigned long)_text, (unsigned long)_etext)) { pgprot_val(forbidden) |= _PAGE_NX; + tmp = ((unsigned long)_etext - address) >> PAGE_SHIFT; + } + num = num > tmp ? tmp : num; /* * The .rodata section needs to be read-only. Using the pfn * catches all aliases. This also includes __ro_after_init, * so do not enforce until kernel_set_to_readonly is true. */ - if (kernel_set_to_readonly && - within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, - __pa_symbol(__end_rodata) >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_RW; + if (kernel_set_to_readonly) { + tmp = (__pa_symbol(__start_rodata) >> PAGE_SHIFT) > pfn ? + (__pa_symbol(__start_rodata) >> PAGE_SHIFT) - pfn : + ULONG_MAX; + if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + __pa_symbol(__end_rodata) >> PAGE_SHIFT)) { + pgprot_val(forbidden) |= _PAGE_RW; + tmp = (__pa_symbol(__end_rodata) >> PAGE_SHIFT) - pfn; + } + num = num > tmp ? tmp : num; + } #if defined(CONFIG_X86_64) /* @@ -333,11 +355,15 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * This will preserve the large page mappings for kernel text/data * at no extra cost. */ + tmp = kernel_set_to_readonly && (unsigned long)_text > address ? + ((unsigned long)_text - a
[PATCH] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
This issue can be easily triggered by free_initmem() functuion on x86_64 cpu. When changing page attr, __change_page_attr_set_clr will call __change_page_attr for every 4K page. And try_preserve_large_page will be called to check whether it needs to split the large page. If cpu supports "pdpe1gb", kernel will try to use 1G large page. In worst case, it needs to check every 4K page in 1G range in try_preserve_large_page function. If __change_page_attr_set_clr needs to change lots of 4K pages, cpu will be stuck for long time. This patch try to cache the last address which had been checked just now. If the next address is in same big page, the cache will be used without full range check. Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 26 ++ 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3bded76e..b9241ac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -552,16 +552,20 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { + static unsigned long address_cache; + static unsigned long do_split_cache = 1; unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; enum pg_level level; - if (cpa->force_split) + spin_lock(_lock); + if (cpa->force_split) { + do_split_cache = 1; return 1; + } - spin_lock(_lock); /* * Check for races, another CPU might have split this page * up already: @@ -627,13 +631,25 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, address, pfn); + addr = address & pmask; + pfn = old_pfn; + /* +* If an address in same range had been checked just now, re-use the +* cache value without full range check. In the worst case, it needs to +* check every 4K page in 1G range, which causes cpu stuck for long +* time. +*/ + if (!do_split_cache && + address_cache >= addr && address_cache < nextpage_addr && + pgprot_val(new_prot) == pgprot_val(old_prot)) { + do_split = do_split_cache; + goto out_unlock; + } /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address & pmask; - pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -670,6 +686,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } out_unlock: + address_cache = address; + do_split_cache = do_split; spin_unlock(_lock); return do_split; -- 2.7.4
[PATCH] x86/mm: fix cpu stuck issue in __change_page_attr_set_clr
This issue can be easily triggered by free_initmem() functuion on x86_64 cpu. When changing page attr, __change_page_attr_set_clr will call __change_page_attr for every 4K page. And try_preserve_large_page will be called to check whether it needs to split the large page. If cpu supports "pdpe1gb", kernel will try to use 1G large page. In worst case, it needs to check every 4K page in 1G range in try_preserve_large_page function. If __change_page_attr_set_clr needs to change lots of 4K pages, cpu will be stuck for long time. This patch try to cache the last address which had been checked just now. If the next address is in same big page, the cache will be used without full range check. Signed-off-by: Bin Yang --- arch/x86/mm/pageattr.c | 26 ++ 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3bded76e..b9241ac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -552,16 +552,20 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { + static unsigned long address_cache; + static unsigned long do_split_cache = 1; unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; enum pg_level level; - if (cpa->force_split) + spin_lock(_lock); + if (cpa->force_split) { + do_split_cache = 1; return 1; + } - spin_lock(_lock); /* * Check for races, another CPU might have split this page * up already: @@ -627,13 +631,25 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, address, pfn); + addr = address & pmask; + pfn = old_pfn; + /* +* If an address in same range had been checked just now, re-use the +* cache value without full range check. In the worst case, it needs to +* check every 4K page in 1G range, which causes cpu stuck for long +* time. +*/ + if (!do_split_cache && + address_cache >= addr && address_cache < nextpage_addr && + pgprot_val(new_prot) == pgprot_val(old_prot)) { + do_split = do_split_cache; + goto out_unlock; + } /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address & pmask; - pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -670,6 +686,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } out_unlock: + address_cache = address; + do_split_cache = do_split; spin_unlock(_lock); return do_split; -- 2.7.4