[PATCH] powerpc/book3s: Remove a few page table update interfaces.
When updating page tables, we need to make sure we fill the page table entry valid bit. We should be using page table populate interface for updating the table entries. The page table 'set' interface allows updating the raw value of page table entry. This can result in updating the entry wrongly. Remove the 'set' interface so that we avoid its future usage. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 8 arch/powerpc/include/asm/book3s/64/pgtable.h | 14 -- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 9c1173283b96..138bc2ecc0c4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -111,7 +111,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS); + *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -138,7 +138,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS); + *pud = __pud(__pgtable_ptr_val(pmd) | PUD_VAL_BITS); } static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, @@ -176,13 +176,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS); + *pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS); + *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } static inline pgtable_t pmd_pgtable(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index dc71e2b92003..a24e00fb7fa7 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -853,11 +853,6 @@ static inline bool pte_ci(pte_t pte) return false; } -static inline void pmd_set(pmd_t *pmdp, unsigned long val) -{ - *pmdp = __pmd(val); -} - static inline void pmd_clear(pmd_t *pmdp) { *pmdp = __pmd(0); @@ -889,11 +884,6 @@ static inline int pmd_bad(pmd_t pmd) return hash__pmd_bad(pmd); } -static inline void pud_set(pud_t *pudp, unsigned long val) -{ - *pudp = __pud(val); -} - static inline void pud_clear(pud_t *pudp) { *pudp = __pud(0); @@ -936,10 +926,6 @@ static inline bool pud_access_permitted(pud_t pud, bool write) } #define pgd_write(pgd) pte_write(pgd_pte(pgd)) -static inline void pgd_set(pgd_t *pgdp, unsigned long val) -{ - *pgdp = __pgd(val); -} static inline void pgd_clear(pgd_t *pgdp) { -- 2.20.1
[PATCH] powerpc/64s: Fix possible corruption on big endian due to pgd/pud_present()
In v4.20 we changed our pgd/pud_present() to check for _PAGE_PRESENT rather than just checking that the value is non-zero, e.g.: static inline int pgd_present(pgd_t pgd) { - return !pgd_none(pgd); + return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); } Unfortunately this is broken on big endian, as the result of the bitwise && is truncated to int, which is always zero because _PAGE_PRESENT is 0x8000ul. This means pgd_present() and pud_present() are always false at compile time, and the compiler elides the subsequent code. Remarkably with that bug present we are still able to boot and run with few noticeable effects. However under some work loads we are able to trigger a warning in the ext4 code: WARNING: CPU: 11 PID: 29593 at fs/ext4/inode.c:3927 .ext4_set_page_dirty+0x70/0xb0 CPU: 11 PID: 29593 Comm: debugedit Not tainted 4.20.0-rc1 #1 ... NIP .ext4_set_page_dirty+0x70/0xb0 LR .set_page_dirty+0xa0/0x150 Call Trace: .set_page_dirty+0xa0/0x150 .unmap_page_range+0xbf0/0xe10 .unmap_vmas+0x84/0x130 .unmap_region+0xe8/0x190 .__do_munmap+0x2f0/0x510 .__vm_munmap+0x80/0x110 .__se_sys_munmap+0x14/0x30 system_call+0x5c/0x70 The fix is simple, we need to convert the result of the bitwise && to an int before returning it. Thanks to Jan Kara and Aneesh for help with debugging. Fixes: da7ad366b497 ("powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit") Cc: sta...@vger.kernel.org # v4.20+ Reported-by: Erhard F. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c9bfe526ca9d..d8c8d7c9df15 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -904,7 +904,7 @@ static inline int pud_none(pud_t pud) static inline int pud_present(pud_t pud) { - return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); } extern struct page *pud_page(pud_t pud); @@ -951,7 +951,7 @@ static inline int pgd_none(pgd_t pgd) static inline int pgd_present(pgd_t pgd) { - return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); } static inline pte_t pgd_pte(pgd_t pgd) -- 2.20.1
[PATCH 1/1] powerpc/64: Adjust order in pcibios_init()
The pcibios_init() function for 64 bit PowerPC currently calls pci_bus_add_devices() before pcibios_resource_survey(), which seems incorrect because it adds devices and attempts to bind their drivers before allocating their resources (although no problems seem to be apparent). So move the call to pci_bus_add_devices() to after pcibios_resource_survey(). This will also allow the ppc_md.pcibios_bus_add_device() hooks to perform actions that depend on PCI resources, both during rescanning (where this is already the case) and at boot time, which should support improvements and refactoring. Signed-off-by: Sam Bobroff --- Hi everyone, I've tested this on a P9 for both the host and a KVM guest, and the change hasn't caused any differences in PCI resource assignments or the general boot messages. I've also had a go at inspecting most of the code used by pci_bus_add_devices() and pcibios_resource_survey() and it doesn't look like there are going to be any changes in behaviour caused by reordering. It might be worth mentioning that the hotplug path (see pcibios_finish_adding_to_bus()) already does resource allocation before calling pci_bus_add_devices(). However, it would be great if someone could test this change on some older hardware or comment on wether we should make the same change on 32 bit machines. Cheers, Sam. arch/powerpc/kernel/pci_64.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 9d8c10d55407..1ce2dbdb 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -58,14 +58,16 @@ static int __init pcibios_init(void) pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); /* Scan all of the recorded PCI controllers. */ - list_for_each_entry_safe(hose, tmp, _list, list_node) { + list_for_each_entry_safe(hose, tmp, _list, list_node) pcibios_scan_phb(hose); - pci_bus_add_devices(hose->bus); - } /* Call common code to handle resource allocation */ pcibios_resource_survey(); + /* Add devices. */ + list_for_each_entry_safe(hose, tmp, _list, list_node) + pci_bus_add_devices(hose->bus); + printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); return 0; -- 2.19.0.2.gcad72f5712
Re: [PATCH v3 2/2] drivers/mtd: Fix device registration error
On 2/13/19 6:58 PM, Boris Brezillon wrote: Subject prefix should be "mtd: powernv_flash: " On Mon, 11 Feb 2019 19:03:38 +0530 "Aneesh Kumar K.V" wrote: This change helps me to get multiple mtd device registered. Without this I get sysfs: cannot create duplicate filename '/bus/nvmem/devices/flash0' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc2-00557-g1ef20ef21f22 #13 Call Trace: [c000b38e3220] [c0b58fe4] dump_stack+0xe8/0x164 (unreliable) [c000b38e3270] [c04cf074] sysfs_warn_dup+0x84/0xb0 [c000b38e32f0] [c04cf6c4] sysfs_do_create_link_sd.isra.0+0x114/0x150 [c000b38e3340] [c0726a84] bus_add_device+0x94/0x1e0 [c000b38e33c0] [c07218f0] device_add+0x4d0/0x830 [c000b38e3480] [c09d54a8] nvmem_register.part.2+0x1c8/0xb30 [c000b38e3560] [c0834530] mtd_nvmem_add+0x90/0x120 [c000b38e3650] [c0835bc8] add_mtd_device+0x198/0x4e0 [c000b38e36f0] [c083619c] mtd_device_parse_register+0x11c/0x280 [c000b38e3780] [c0840830] powernv_flash_probe+0x180/0x250 [c000b38e3820] [c072c120] platform_drv_probe+0x60/0xf0 [c000b38e38a0] [c07283c8] really_probe+0x138/0x4d0 [c000b38e3930] [c0728acc] driver_probe_device+0x13c/0x1b0 [c000b38e39b0] [c0728c7c] __driver_attach+0x13c/0x1c0 [c000b38e3a30] [c0725130] bus_for_each_dev+0xa0/0x120 [c000b38e3a90] [c0727b2c] driver_attach+0x2c/0x40 [c000b38e3ab0] [c07270f8] bus_add_driver+0x228/0x360 [c000b38e3b40] [c072a2e0] driver_register+0x90/0x1a0 [c000b38e3bb0] [c072c020] __platform_driver_register+0x50/0x70 [c000b38e3bd0] [c105c984] powernv_flash_driver_init+0x24/0x38 [c000b38e3bf0] [c0010904] do_one_initcall+0x84/0x464 [c000b38e3cd0] [c1004548] kernel_init_freeable+0x530/0x634 [c000b38e3db0] [c0011154] kernel_init+0x1c/0x168 [c000b38e3e20] [c000bed4] ret_from_kernel_thread+0x5c/0x68 mtd mtd1: Failed to register NVMEM device With the change we now have root@(none):/sys/bus/nvmem/devices# ls -al total 0 drwxr-xr-x 2 root root 0 Feb 6 20:49 . drwxr-xr-x 4 root root 0 Feb 6 20:49 .. lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@0 -> ../../../devices/platform/ibm,opal:flash@0/mtd/mtd0/flash@0 lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@1 -> ../../../devices/platform/ibm,opal:flash@1/mtd/mtd1/flash@1 Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of device_node.name") Actually it's not this commit that is at fault as mtd->name was already given the value of device_node->name before that. I think you're actually fixing 1cbb4a1c433a ("mtd: powernv: Add powernv flash MTD abstraction driver"). No need to send a new version, I can fix that when applying, just let me know if you're okay with the changes I suggested. The suggested changes looks good. Thanks -aneesh
Re: [PATCH 0/5] use pinned_vm instead of locked_vm to account pinned pages
On Mon, Feb 11, 2019 at 03:54:47PM -0700, Jason Gunthorpe wrote: > On Mon, Feb 11, 2019 at 05:44:32PM -0500, Daniel Jordan wrote: > > > All five of these places, and probably some of Davidlohr's conversions, > > probably want to be collapsed into a common helper in the core mm for > > accounting pinned pages. I tried, and there are several details that > > likely need discussion, so this can be done as a follow-on. > > I've wondered the same.. I'm really thinking this would be a nice way to ensure it gets cleaned up and does not happen again. Also, by moving it to the core we could better manage any user visible changes. >From a high level, pinned is a subset of locked so it seems like we need a 2 sets of helpers. try_increment_locked_vm(...) decrement_locked_vm(...) try_increment_pinned_vm(...) decrement_pinned_vm(...) Where try_increment_pinned_vm() also increments locked_vm... Of course this may end up reverting the improvement of Davidlohr Bueso's atomic work... :-( Furthermore it would seem better (although I don't know if at all possible) if this were accounted for in core calls which tracked them based on how the pages are being used so that drivers can't call try_increment_locked_vm() and then pin the pages... Thus getting the account wrong vs what actually happened. And then in the end we can go back to locked_vm being the value checked against RLIMIT_MEMLOCK. Ira
Re: [PATCH 1/5] vfio/type1: use pinned_vm instead of locked_vm to account pinned pages
On Wed, Feb 13, 2019 at 01:03:30PM -0700, Alex Williamson wrote: > Daniel Jordan wrote: > > On Tue, Feb 12, 2019 at 11:41:10AM -0700, Alex Williamson wrote: > > > This still makes me nervous because we have userspace dependencies on > > > setting process locked memory. > > > > Could you please expand on this? Trying to get more context. > > VFIO is a userspace driver interface and the pinned/locked page > accounting we're doing here is trying to prevent a user from exceeding > their locked memory limits. Thus a VM management tool or unprivileged > userspace driver needs to have appropriate locked memory limits > configured for their use case. Currently we do not have a unified > accounting scheme, so if a page is mlock'd by the user and also mapped > through VFIO for DMA, it's accounted twice, these both increment > locked_vm and userspace needs to manage that. If pinned memory > and locked memory are now two separate buckets and we're only comparing > one of them against the locked memory limit, then it seems we have > effectively doubled the user's locked memory for this use case, as > Jason questioned. The user could mlock one page and DMA map another, > they're both "locked", but now they only take one slot in each bucket. Right, yes. Should have been more specific. I was after a concrete use case where this would happen (sounded like you may have had a specific tool in mind). But it doesn't matter. I understand your concern and agree that, given the possibility that accounting in _some_ tool can be affected, we should fix accounting before changing user visible behavior. I can start a separate discussion, having opened the can of worms again :) > If we continue forward with using a separate bucket here, userspace > could infer that accounting is unified and lower the user's locked > memory limit, or exploit the gap that their effective limit might > actually exceed system memory. In the former case, if we do eventually > correct to compare the total of the combined buckets against the user's > locked memory limits, we'll break users that have adapted their locked > memory limits to meet the apparent needs. In the latter case, the > inconsistent accounting is potentially an attack vector. Makes sense. > > > There's a user visible difference if we > > > account for them in the same bucket vs separate. Perhaps we're > > > counting in the wrong bucket now, but if we "fix" that and userspace > > > adapts, how do we ever go back to accounting both mlocked and pinned > > > memory combined against rlimit? Thanks, > > > > PeterZ posted an RFC that addresses this point[1]. It kept pinned_vm and > > locked_vm accounting separate, but allowed the two to be added safely to be > > compared against RLIMIT_MEMLOCK. > > Unless I'm incorrect in the concerns above, I don't see how we can > convert vfio before this occurs. > > > Anyway, until some solution is agreed on, are there objections to converting > > locked_vm to an atomic, to avoid user-visible changes, instead of switching > > locked_vm users to pinned_vm? > > Seems that as long as we have separate buckets that are compared > individually to rlimit that we've got problems, it's just a matter of > where they're exposed based on which bucket is used for which > interface. Thanks, Indeed. But for now, any concern with simply changing the type of the currently used counter to an atomic, to reduce mmap_sem usage? This is just an implementation detail, invisible to userspace.
Re: [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'
On Wed, Feb 13, 2019 at 04:11:10PM -0700, Jason Gunthorpe wrote: > On Wed, Feb 13, 2019 at 03:04:51PM -0800, ira.we...@intel.com wrote: > > From: Ira Weiny > > > > To facilitate additional options to get_user_pages_fast() change the > > singular write parameter to be gup_flags. > > So now we have: > > long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, > struct page **pages, unsigned int gup_flags); > > and > > int get_user_pages_fast(unsigned long start, int nr_pages, > unsigned int gup_flags, struct page **pages) > > Does this make any sense? At least the arguments should be in the same > order, I think. Yes... and no. see below. > > Also this comment: > /* > * get_user_pages_unlocked() is suitable to replace the form: > * > * down_read(>mmap_sem); > * get_user_pages(tsk, mm, ..., pages, NULL); > * up_read(>mmap_sem); > * > * with: > * > * get_user_pages_unlocked(tsk, mm, ..., pages); > * > * It is functionally equivalent to get_user_pages_fast so > * get_user_pages_fast should be used instead if specific gup_flags > * (e.g. FOLL_FORCE) are not required. > */ > > Needs some attention as the recommendation is now nonsense. IMO they are not functionally equivalent. We can't remove *_unlocked() as it is used as both a helper for the arch specific *_fast() calls, _and_ in drivers. Again I don't know the history here but it could be that the drivers should never have used the call in the first place??? Or been converted at some point? I could change the comment to be something like /* * get_user_pages_unlocked() is only to be used by arch specific * get_user_pages_fast() calls. Drivers should be calling * get_user_pages_fast() */ Instead of the current comment. And change the drivers to get_user_pages_fast(). However, I'm not sure if these drivers need the FOLL_TOUCH flag which *_unlocked() adds for them. And adding FOLL_TOUCH to *_fast() is not going to give the same functionality. It _looks_ like we can add FOLL_TOUCH functionality to the fast path in the generic code. I'm not sure about the arch's. If we did that then we can have those drivers use FOLL_TOUCH or not in *_fast() if they want/need. > > Honestly a proper explanation of why two functions exist would be > great at this point :) I've not researched it. I do agree that there seems to be a lot of calls in this file and the differences are subtle. Ira > > Jason
Re: [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'
On Wed, Feb 13, 2019 at 03:04:51PM -0800, ira.we...@intel.com wrote: > From: Ira Weiny > > To facilitate additional options to get_user_pages_fast() change the > singular write parameter to be gup_flags. So now we have: long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); and int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) Does this make any sense? At least the arguments should be in the same order, I think. Also this comment: /* * get_user_pages_unlocked() is suitable to replace the form: * * down_read(>mmap_sem); * get_user_pages(tsk, mm, ..., pages, NULL); * up_read(>mmap_sem); * * with: * * get_user_pages_unlocked(tsk, mm, ..., pages); * * It is functionally equivalent to get_user_pages_fast so * get_user_pages_fast should be used instead if specific gup_flags * (e.g. FOLL_FORCE) are not required. */ Needs some attention as the recommendation is now nonsense. Honestly a proper explanation of why two functions exist would be great at this point :) Jason
[PATCH V2 7/7] IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
From: Ira Weiny Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against FS DAX pages being mapped. Signed-off-by: Ira Weiny --- drivers/infiniband/hw/mthca/mthca_memfree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 112d2f38e0de..8ff0e90d7564 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -472,7 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, goto out; } - ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages); + ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, + FOLL_WRITE | FOLL_LONGTERM, pages); if (ret < 0) goto out; -- 2.20.1
[PATCH V2 6/7] IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
From: Ira Weiny Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against FS DAX pages being mapped. Signed-off-by: Ira Weiny --- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 31c523b2a9f5..b53cc0240e02 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -673,7 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, else j = npages; - ret = get_user_pages_fast(addr, j, 0, pages); + ret = get_user_pages_fast(addr, j, FOLL_LONGTERM, pages); if (ret != j) { i = 0; j = ret; -- 2.20.1
[PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
From: Ira Weiny Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against FS DAX pages being mapped. Signed-off-by: Ira Weiny --- drivers/infiniband/hw/hfi1/user_pages.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 78ccacaf97d0..6a7f9cd5a94e 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np bool writable, struct page **pages) { int ret; + unsigned int gup_flags = writable ? FOLL_WRITE : 0; - ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0, - pages); + gup_flags |= FOLL_LONGTERM; + + ret = get_user_pages_fast(vaddr, npages, gup_flags, pages); if (ret < 0) return ret; -- 2.20.1
[PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
From: Ira Weiny DAX pages were previously unprotected from longterm pins when users called get_user_pages_fast(). Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall back to regular GUP processing if a DEVMAP page is encountered. Signed-off-by: Ira Weiny --- mm/gup.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 6f32d36b3c5b..f7e759c523bb 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1439,6 +1439,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, goto pte_unmap; if (pte_devmap(pte)) { + if (unlikely(flags & FOLL_LONGTERM)) + goto pte_unmap; + pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); @@ -1578,8 +1581,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; - if (pmd_devmap(orig)) + if (pmd_devmap(orig)) { + if (unlikely(flags & FOLL_LONGTERM)) + return 0; return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr); + } refs = 0; page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -1904,8 +1910,20 @@ int get_user_pages_fast(unsigned long start, int nr_pages, start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, pages, - gup_flags); + if (gup_flags & FOLL_LONGTERM) { + down_read(>mm->mmap_sem); + ret = __gup_longterm_locked(current, current->mm, + start, nr_pages - nr, + pages, NULL, gup_flags); + up_read(>mm->mmap_sem); + } else { + /* +* retain FAULT_FOLL_ALLOW_RETRY optimization if +* possible +*/ + ret = get_user_pages_unlocked(start, nr_pages - nr, + pages, gup_flags); + } /* Have to be a bit careful with return values */ if (nr > 0) { -- 2.20.1
[PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'
From: Ira Weiny To facilitate additional options to get_user_pages_fast() change the singular write parameter to be gup_flags. This patch does not change any functionality. New functionality will follow in subsequent patches. Some of the get_user_pages_fast() call sites were unchanged because they already passed FOLL_WRITE or 0 for the write parameter. Signed-off-by: Ira Weiny --- arch/mips/mm/gup.c | 11 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c| 4 ++-- arch/powerpc/kvm/e500_mmu.c| 2 +- arch/powerpc/mm/mmu_context_iommu.c| 4 ++-- arch/s390/kvm/interrupt.c | 2 +- arch/s390/mm/gup.c | 12 ++-- arch/sh/mm/gup.c | 11 ++- arch/sparc/mm/gup.c| 9 + arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/svm.c | 2 +- drivers/fpga/dfl-afu-dma-region.c | 2 +- drivers/gpu/drm/via/via_dmablit.c | 3 ++- drivers/infiniband/hw/hfi1/user_pages.c| 3 ++- drivers/misc/genwqe/card_utils.c | 2 +- drivers/misc/vmw_vmci/vmci_host.c | 2 +- drivers/misc/vmw_vmci/vmci_queue_pair.c| 6 -- drivers/platform/goldfish/goldfish_pipe.c | 3 ++- drivers/rapidio/devices/rio_mport_cdev.c | 4 +++- drivers/sbus/char/oradax.c | 2 +- drivers/scsi/st.c | 3 ++- drivers/staging/gasket/gasket_page_table.c | 4 ++-- drivers/tee/tee_shm.c | 2 +- drivers/vfio/vfio_iommu_spapr_tce.c| 3 ++- drivers/vhost/vhost.c | 2 +- drivers/video/fbdev/pvr2fb.c | 2 +- drivers/virt/fsl_hypervisor.c | 2 +- drivers/xen/gntdev.c | 2 +- fs/orangefs/orangefs-bufmap.c | 2 +- include/linux/mm.h | 4 ++-- kernel/futex.c | 2 +- lib/iov_iter.c | 7 +-- mm/gup.c | 10 +- mm/util.c | 8 net/ceph/pagevec.c | 2 +- net/rds/info.c | 2 +- net/rds/rdma.c | 3 ++- 36 files changed, 81 insertions(+), 65 deletions(-) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 0d14e0d8eacf..4c2b4483683c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, )) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, )) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); @@ -289,7 +290,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - pages, write ? FOLL_WRITE : 0); + pages, gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bd2dcfbf00cd..8fcb0a921e46 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -582,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing, pages); + npages = get_user_pages_fast(hva, 1,
[PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
From: Ira Weiny Rather than have a separate get_user_pages_longterm() call, introduce FOLL_LONGTERM and change the longterm callers to use it. This patch does not change any functionality. FOLL_LONGTERM can only be supported with get_user_pages() as it requires vmas to determine if DAX is in use. Signed-off-by: Ira Weiny --- drivers/infiniband/core/umem.c | 5 +- drivers/infiniband/hw/qib/qib_user_pages.c | 8 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 9 +- drivers/media/v4l2-core/videobuf-dma-sg.c | 6 +- drivers/vfio/vfio_iommu_type1.c| 3 +- include/linux/mm.h | 13 +- mm/gup.c | 138 - mm/gup_benchmark.c | 5 +- 8 files changed, 101 insertions(+), 86 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b69d3efa8712..120a40df91b4 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -185,10 +185,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, while (npages) { down_read(>mmap_sem); - ret = get_user_pages_longterm(cur_base, + ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), -gup_flags, page_list, vma_list); +gup_flags | FOLL_LONGTERM, +page_list, vma_list); if (ret < 0) { up_read(>mmap_sem); goto umem_release; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index ef8bcf366ddc..1b9368261035 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -114,10 +114,10 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages, down_read(>mm->mmap_sem); for (got = 0; got < num_pages; got += ret) { - ret = get_user_pages_longterm(start_page + got * PAGE_SIZE, - num_pages - got, - FOLL_WRITE | FOLL_FORCE, - p + got, NULL); + ret = get_user_pages(start_page + got * PAGE_SIZE, +num_pages - got, +FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE, +p + got, NULL); if (ret < 0) { up_read(>mm->mmap_sem); goto bail_release; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 06862a6af185..1d9a182ac163 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -143,10 +143,11 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages_longterm(cur_base, - min_t(unsigned long, npages, - PAGE_SIZE / sizeof(struct page *)), - gup_flags, page_list, NULL); + ret = get_user_pages(cur_base, +min_t(unsigned long, npages, +PAGE_SIZE / sizeof(struct page *)), +gup_flags | FOLL_LONGTERM, +page_list, NULL); if (ret < 0) goto out; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 08929c087e27..870a2a526e0b 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -186,12 +186,12 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); - err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages, -flags, dma->pages, NULL); + err = get_user_pages(data & PAGE_MASK, dma->nr_pages, +flags | FOLL_LONGTERM, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; - dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err, + dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages); return err < 0 ? err : -EINVAL; } diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 73652e21efec..1500bd0bb6da 100644 ---
[PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it
From: Ira Weiny NOTE: This series depends on my clean up patch to remove the write parameter from gup_fast_permitted()[1] HFI1, qib, and mthca, use get_user_pages_fast() due to it performance advantages. These pages can be held for a significant time. But get_user_pages_fast() does not protect against mapping of FS DAX pages. Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which retains the performance while also adding the FS DAX checks. XDP has also shown interest in using this functionality.[2] In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and remove the specialized get_user_pages_longterm call. [1] https://lkml.org/lkml/2019/2/11/237 [2] https://lkml.org/lkml/2019/2/11/1789 Ira Weiny (7): mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM mm/gup: Change write parameter to flags in fast walk mm/gup: Change GUP fast to use flags rather than a write 'bool' mm/gup: Add FOLL_LONGTERM capability to GUP fast IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast() IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast() IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast() arch/mips/mm/gup.c | 11 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 +- arch/powerpc/kvm/e500_mmu.c | 2 +- arch/powerpc/mm/mmu_context_iommu.c | 4 +- arch/s390/kvm/interrupt.c | 2 +- arch/s390/mm/gup.c | 12 +- arch/sh/mm/gup.c| 11 +- arch/sparc/mm/gup.c | 9 +- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/svm.c | 2 +- drivers/fpga/dfl-afu-dma-region.c | 2 +- drivers/gpu/drm/via/via_dmablit.c | 3 +- drivers/infiniband/core/umem.c | 5 +- drivers/infiniband/hw/hfi1/user_pages.c | 5 +- drivers/infiniband/hw/mthca/mthca_memfree.c | 3 +- drivers/infiniband/hw/qib/qib_user_pages.c | 8 +- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c| 9 +- drivers/media/v4l2-core/videobuf-dma-sg.c | 6 +- drivers/misc/genwqe/card_utils.c| 2 +- drivers/misc/vmw_vmci/vmci_host.c | 2 +- drivers/misc/vmw_vmci/vmci_queue_pair.c | 6 +- drivers/platform/goldfish/goldfish_pipe.c | 3 +- drivers/rapidio/devices/rio_mport_cdev.c| 4 +- drivers/sbus/char/oradax.c | 2 +- drivers/scsi/st.c | 3 +- drivers/staging/gasket/gasket_page_table.c | 4 +- drivers/tee/tee_shm.c | 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 3 +- drivers/vfio/vfio_iommu_type1.c | 3 +- drivers/vhost/vhost.c | 2 +- drivers/video/fbdev/pvr2fb.c| 2 +- drivers/virt/fsl_hypervisor.c | 2 +- drivers/xen/gntdev.c| 2 +- fs/orangefs/orangefs-bufmap.c | 2 +- include/linux/mm.h | 17 +- kernel/futex.c | 2 +- lib/iov_iter.c | 7 +- mm/gup.c| 220 mm/gup_benchmark.c | 5 +- mm/util.c | 8 +- net/ceph/pagevec.c | 2 +- net/rds/info.c | 2 +- net/rds/rdma.c | 3 +- 44 files changed, 232 insertions(+), 180 deletions(-) -- 2.20.1
[PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk
From: Ira Weiny In order to support more options in the GUP fast walk, change the write parameter to flags throughout the call stack. This patch does not change functionality and passes FOLL_WRITE where write was previously used. Signed-off-by: Ira Weiny --- mm/gup.c | 52 ++-- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index ee96eaff118c..681388236106 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1417,7 +1417,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, -int write, struct page **pages, int *nr) +unsigned int flags, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; int nr_start = *nr, ret = 0; @@ -1435,7 +1435,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, if (pte_protnone(pte)) goto pte_unmap; - if (!pte_access_permitted(pte, write)) + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) goto pte_unmap; if (pte_devmap(pte)) { @@ -1487,7 +1487,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, * useful to have gup_huge_pmd even if we can't operate on ptes. */ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, -int write, struct page **pages, int *nr) +unsigned int flags, struct page **pages, int *nr) { return 0; } @@ -1570,12 +1570,12 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, #endif static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) + unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *head, *page; int refs; - if (!pmd_access_permitted(orig, write)) + if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pmd_devmap(orig)) @@ -1608,12 +1608,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, } static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) + unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *head, *page; int refs; - if (!pud_access_permitted(orig, write)) + if (!pud_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pud_devmap(orig)) @@ -1646,13 +1646,13 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, } static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, - unsigned long end, int write, + unsigned long end, unsigned int flags, struct page **pages, int *nr) { int refs; struct page *head, *page; - if (!pgd_access_permitted(orig, write)) + if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; BUILD_BUG_ON(pgd_devmap(orig)); @@ -1683,7 +1683,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, } static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) + unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; @@ -1705,7 +1705,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (pmd_protnone(pmd)) return 0; - if (!gup_huge_pmd(pmd, pmdp, addr, next, write, + if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, pages, nr)) return 0; @@ -1715,9 +1715,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, * pmd format and THP pmd format */ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, -PMD_SHIFT, next, write, pages, nr)) +PMD_SHIFT, next, flags, pages, nr)) return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -1725,7 +1725,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, } static int
Re: [PATCH 1/5] vfio/type1: use pinned_vm instead of locked_vm to account pinned pages
On Wed, Feb 13, 2019 at 01:03:30PM -0700, Alex Williamson wrote: > > PeterZ posted an RFC that addresses this point[1]. It kept pinned_vm and > > locked_vm accounting separate, but allowed the two to be added safely to be > > compared against RLIMIT_MEMLOCK. > > Unless I'm incorrect in the concerns above, I don't see how we can > convert vfio before this occurs. RDMA was converted to this pinned_vm scheme a long time ago, arguably it is a mistake that VFIO did something different... This was to fix some other bug where reporting of pages was wrong. You are not wrong that this approach doesn't entirely make sense though. :) Jason
[PATCH v4 3/3] locking/rwsem: Optimize down_read_trylock()
Modify __down_read_trylock() to optimize for an unlocked rwsem and make it generate slightly better code. Before this patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: jmp0x18 0x0007 <+7>: lea0x1(%rdx),%rcx 0x000b <+11>:mov%rdx,%rax 0x000e <+14>:lock cmpxchg %rcx,(%rdi) 0x0013 <+19>:cmp%rax,%rdx 0x0016 <+22>:je 0x23 0x0018 <+24>:mov(%rdi),%rdx 0x001b <+27>:test %rdx,%rdx 0x001e <+30>:jns0x7 0x0020 <+32>:xor%eax,%eax 0x0022 <+34>:retq 0x0023 <+35>:mov%gs:0x0,%rax 0x002c <+44>:or $0x3,%rax 0x0030 <+48>:mov%rax,0x20(%rdi) 0x0034 <+52>:mov$0x1,%eax 0x0039 <+57>:retq After patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: xor%eax,%eax 0x0007 <+7>: lea0x1(%rax),%rdx 0x000b <+11>:lock cmpxchg %rdx,(%rdi) 0x0010 <+16>:jne0x29 0x0012 <+18>:mov%gs:0x0,%rax 0x001b <+27>:or $0x3,%rax 0x001f <+31>:mov%rax,0x20(%rdi) 0x0023 <+35>:mov$0x1,%eax 0x0028 <+40>:retq 0x0029 <+41>:test %rax,%rax 0x002c <+44>:jns0x7 0x002e <+46>:xor%eax,%eax 0x0030 <+48>:retq By using a rwsem microbenchmark, the down_read_trylock() rate (with a load of 10 to lengthen the lock critical section) on a x86-64 system before and after the patch were: Before PatchAfter Patch # of Threads rlock rlock - - 1 14,496 14,716 28,644 8,453 46,799 6,983 85,664 7,190 On a ARM64 system, the performance results were: Before PatchAfter Patch # of Threads rlock rlock - - 1 23,676 24,488 27,697 9,502 44,945 3,440 82,641 1,603 For the uncontended case (1 thread), the new down_read_trylock() is a little bit faster. For the contended cases, the new down_read_trylock() perform pretty well in x86-64, but performance degrades at high contention level on ARM64. Suggested-by: Linus Torvalds Signed-off-by: Waiman Long --- kernel/locking/rwsem.h | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 45ee002..1f5775a 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -174,14 +174,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { - long tmp; + /* +* Optimize for the case when the rwsem is not locked at all. +*/ + long tmp = RWSEM_UNLOCKED_VALUE; - while ((tmp = atomic_long_read(>count)) >= 0) { - if (tmp == atomic_long_cmpxchg_acquire(>count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { + do { + if (atomic_long_try_cmpxchg_acquire(>count, , + tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } - } + } while (tmp >= 0); return 0; } -- 1.8.3.1
[PATCH v4 2/3] locking/rwsem: Remove rwsem-spinlock.c & use rwsem-xadd.c for all archs
Currently, we have two different implementation of rwsem: 1) CONFIG_RWSEM_GENERIC_SPINLOCK (rwsem-spinlock.c) 2) CONFIG_RWSEM_XCHGADD_ALGORITHM (rwsem-xadd.c) As we are going to use a single generic implementation for rwsem-xadd.c and no architecture-specific code will be needed, there is no point in keeping two different implementations of rwsem. In most cases, the performance of rwsem-spinlock.c will be worse. It also doesn't get all the performance tuning and optimizations that had been implemented in rwsem-xadd.c over the years. For simplication, we are going to remove rwsem-spinlock.c and make all architectures use a single implementation of rwsem - rwsem-xadd.c. All references to RWSEM_GENERIC_SPINLOCK and RWSEM_XCHGADD_ALGORITHM in the code are removed. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long --- arch/alpha/Kconfig | 7 - arch/arc/Kconfig| 3 - arch/arm/Kconfig| 4 - arch/arm64/Kconfig | 3 - arch/c6x/Kconfig| 3 - arch/csky/Kconfig | 3 - arch/h8300/Kconfig | 3 - arch/hexagon/Kconfig| 6 - arch/ia64/Kconfig | 4 - arch/m68k/Kconfig | 7 - arch/microblaze/Kconfig | 6 - arch/mips/Kconfig | 7 - arch/nds32/Kconfig | 3 - arch/nios2/Kconfig | 3 - arch/openrisc/Kconfig | 6 - arch/parisc/Kconfig | 6 - arch/powerpc/Kconfig| 7 - arch/riscv/Kconfig | 3 - arch/s390/Kconfig | 6 - arch/sh/Kconfig | 6 - arch/sparc/Kconfig | 8 - arch/unicore32/Kconfig | 6 - arch/x86/Kconfig| 3 - arch/x86/um/Kconfig | 6 - arch/xtensa/Kconfig | 3 - include/linux/rwsem-spinlock.h | 47 -- include/linux/rwsem.h | 5 - kernel/Kconfig.locks| 2 +- kernel/locking/Makefile | 4 +- kernel/locking/rwsem-spinlock.c | 339 kernel/locking/rwsem.h | 3 - 31 files changed, 2 insertions(+), 520 deletions(-) delete mode 100644 include/linux/rwsem-spinlock.h delete mode 100644 kernel/locking/rwsem-spinlock.c diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 584a6e1..27c8712 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -49,13 +49,6 @@ config MMU bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config ARCH_HAS_ILOG2_U32 bool default n diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 376366a..c0dd229 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER config GENERIC_CSUM def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config ARCH_DISCONTIGMEM_ENABLE def_bool n diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 664e918..4d81f69 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -176,10 +176,6 @@ config TRACE_IRQFLAGS_SUPPORT bool default !CPU_V7M -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a4168d3..24bbcfa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -238,9 +238,6 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config RWSEM_XCHGADD_ALGORITHM - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 456e154..f114655 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -26,9 +26,6 @@ config MMU config FPU def_bool n -config RWSEM_GENERIC_SPINLOCK - def_bool y - config GENERIC_CALIBRATE_DELAY def_bool y diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 398113c..90279a1 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -93,9 +93,6 @@ config GENERIC_HWEIGHT config MMU def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config STACKTRACE_SUPPORT def_bool y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 6472a06..ba33326 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -26,9 +26,6 @@ config H8300 config CPU_BIG_ENDIAN def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index fb2fbfc..5d07c8d 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -64,12 +64,6 @@ config GENERIC_CSUM config GENERIC_IRQ_PROBE def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool n - -config RWSEM_XCHGADD_ALGORITHM - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
[PATCH v4 1/3] locking/rwsem: Remove arch specific rwsem files
As the generic rwsem-xadd code is using the appropriate acquire and release versions of the atomic operations, the arch specific rwsem.h files will not be that much faster than the generic code as long as the atomic functions are properly implemented. So we can remove those arch specific rwsem.h and stop building asm/rwsem.h to reduce maintenance effort. Currently, only x86, alpha and ia64 have implemented architecture specific fast paths. I don't have access to alpha and ia64 systems for testing, but they are legacy systems that are not likely to be updated to the latest kernel anyway. By using a rwsem microbenchmark, the total locking rates on a 4-socket 56-core 112-thread x86-64 system before and after the patch were as follows (mixed means equal # of read and write locks): Before Patch After Patch # of Threads wlock rlock mixed wlock rlock mixed - - - - - - 129,201 30,143 29,45828,615 30,172 29,201 2 6,807 13,299 1,171 7,725 15,025 1,804 4 6,504 12,755 1,520 7,127 14,286 1,345 8 6,762 13,412 764 6,826 13,652 726 16 6,693 15,408 662 6,599 15,938 626 32 6,145 15,286 496 5,549 15,487 511 64 5,812 15,495 60 5,858 15,572 60 There were some run-to-run variations for the multi-thread tests. For x86-64, using the generic C code fast path seems to be a little bit faster than the assembly version with low lock contention. Looking at the assembly version of the fast paths, there are assembly to/from C code wrappers that save and restore all the callee-clobbered registers (7 registers on x86-64). The assembly generated from the generic C code doesn't need to do that. That may explain the slight performance gain here. The generic asm rwsem.h can also be merged into kernel/locking/rwsem.h with no code change as no other code other than those under kernel/locking needs to access the internal rwsem macros and functions. Signed-off-by: Waiman Long --- MAINTAINERS | 1 - arch/alpha/include/asm/rwsem.h | 211 --- arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/rwsem.h | 172 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild| 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/x86/include/asm/rwsem.h| 237 arch/x86/lib/Makefile | 1 - arch/x86/lib/rwsem.S| 156 -- arch/x86/um/Makefile| 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/rwsem.h | 140 include/linux/rwsem.h | 4 +- kernel/locking/percpu-rwsem.c | 2 + kernel/locking/rwsem.h | 130 ++ 19 files changed, 133 insertions(+), 930 deletions(-) delete mode 100644 arch/alpha/include/asm/rwsem.h delete mode 100644 arch/ia64/include/asm/rwsem.h delete mode 100644 arch/x86/include/asm/rwsem.h delete mode 100644 arch/x86/lib/rwsem.S delete mode 100644 include/asm-generic/rwsem.h diff --git a/MAINTAINERS b/MAINTAINERS index 9919840..053f536 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8926,7 +8926,6 @@ F:arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h F: include/linux/rwsem*.h -F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h F: lib/locking*.[ch] F: kernel/locking/ diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h deleted file mode 100644 index cf8fc8f9..000 --- a/arch/alpha/include/asm/rwsem.h +++ /dev/null @@ -1,211 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ALPHA_RWSEM_H -#define _ALPHA_RWSEM_H - -/* - * Written by Ivan Kokshaysky , 2001. - * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h - */ - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#include - -#define RWSEM_UNLOCKED_VALUE 0xL -#define RWSEM_ACTIVE_BIAS 0x0001L -#define RWSEM_ACTIVE_MASK 0xL -#define RWSEM_WAITING_BIAS (-0x0001L) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -static inline int ___down_read(struct rw_semaphore *sem) -{ - long oldcount; -#ifndefCONFIG_SMP - oldcount = sem->count.counter; - sem->count.counter += RWSEM_ACTIVE_READ_BIAS; -#else - long temp; - __asm__ __volatile__( -
[PATCH v4 0/3] locking/rwsem: Rwsem rearchitecture part 0
v4: - Remove rwsem-spinlock.c and make all archs use rwsem-xadd.c. v3: - Optimize __down_read_trylock() for the uncontended case as suggested by Linus. v2: - Add patch 2 to optimize __down_read_trylock() as suggested by PeterZ. - Update performance test data in patch 1. The goal of this patchset is to remove the architecture specific files for rwsem-xadd to make it easer to add enhancements in the later rwsem patches. It also removes the legacy rwsem-spinlock.c file and make all the architectures use one single implementation of rwsem - rwsem-xadd.c. Waiman Long (3): locking/rwsem: Remove arch specific rwsem files locking/rwsem: Remove rwsem-spinlock.c & use rwsem-xadd.c for all archs locking/rwsem: Optimize down_read_trylock() MAINTAINERS | 1 - arch/alpha/Kconfig | 7 - arch/alpha/include/asm/rwsem.h | 211 - arch/arc/Kconfig| 3 - arch/arm/Kconfig| 4 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/Kconfig | 3 - arch/arm64/include/asm/Kbuild | 1 - arch/c6x/Kconfig| 3 - arch/csky/Kconfig | 3 - arch/h8300/Kconfig | 3 - arch/hexagon/Kconfig| 6 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/Kconfig | 4 - arch/ia64/include/asm/rwsem.h | 172 arch/m68k/Kconfig | 7 - arch/microblaze/Kconfig | 6 - arch/mips/Kconfig | 7 - arch/nds32/Kconfig | 3 - arch/nios2/Kconfig | 3 - arch/openrisc/Kconfig | 6 - arch/parisc/Kconfig | 6 - arch/powerpc/Kconfig| 7 - arch/powerpc/include/asm/Kbuild | 1 - arch/riscv/Kconfig | 3 - arch/s390/Kconfig | 6 - arch/s390/include/asm/Kbuild| 1 - arch/sh/Kconfig | 6 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/Kconfig | 8 - arch/sparc/include/asm/Kbuild | 1 - arch/unicore32/Kconfig | 6 - arch/x86/Kconfig| 3 - arch/x86/include/asm/rwsem.h| 237 arch/x86/lib/Makefile | 1 - arch/x86/lib/rwsem.S| 156 -- arch/x86/um/Kconfig | 6 - arch/x86/um/Makefile| 1 - arch/xtensa/Kconfig | 3 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/rwsem.h | 140 - include/linux/rwsem-spinlock.h | 47 -- include/linux/rwsem.h | 9 +- kernel/Kconfig.locks| 2 +- kernel/locking/Makefile | 4 +- kernel/locking/percpu-rwsem.c | 2 + kernel/locking/rwsem-spinlock.c | 339 kernel/locking/rwsem.h | 130 +++ 48 files changed, 135 insertions(+), 1447 deletions(-) delete mode 100644 arch/alpha/include/asm/rwsem.h delete mode 100644 arch/ia64/include/asm/rwsem.h delete mode 100644 arch/x86/include/asm/rwsem.h delete mode 100644 arch/x86/lib/rwsem.S delete mode 100644 include/asm-generic/rwsem.h delete mode 100644 include/linux/rwsem-spinlock.h delete mode 100644 kernel/locking/rwsem-spinlock.c -- 1.8.3.1
Re: [PATCH v4 1/2] dt-bindings: soc: fsl: Document Qixis FPGA usage
On Tue, Feb 05, 2019 at 10:14:40AM +, Pankaj Bansal wrote: > an FPGA-based system controller, called “Qixis”, which > manages several critical system features, including: > • Reset sequencing > • Power supply configuration > • Board configuration > • hardware configuration > > The qixis registers are accessible over one or more system-specific > interfaces, typically I2C, JTAG or an embedded processor. > > Signed-off-by: Pankaj Bansal > --- > > Notes: > V4: > - No Change > V3: > - Added boardname based compatible field in bindings > - Added bindings for MMIO based FPGA > V2: > - No change > > .../bindings/soc/fsl/qixis_ctrl.txt | 53 ++ > 1 file changed, 53 insertions(+) > > diff --git a/Documentation/devicetree/bindings/soc/fsl/qixis_ctrl.txt > b/Documentation/devicetree/bindings/soc/fsl/qixis_ctrl.txt > new file mode 100644 > index ..5d510df14be8 > --- /dev/null > +++ b/Documentation/devicetree/bindings/soc/fsl/qixis_ctrl.txt > @@ -0,0 +1,53 @@ > +* QIXIS FPGA block > + > +an FPGA-based system controller, called “Qixis”, which > +manages several critical system features, including: > +• Configuration switch monitoring > +• Power on/off sequencing > +• Reset sequencing > +• Power supply configuration > +• Board configuration > +• hardware configuration > +• Background power data collection (DCM) > +• Fault monitoring > +• RCW bypass SRAM (replace flash RCW with internal RCW) (NOR only) > +• Dedicated functional validation blocks (POSt/IRS, triggered event, and so > on) > +• I2C master for remote board control even with no DUT available > + > +The qixis registers are accessible over one or more system-specific > interfaces, > +typically I2C, JTAG or an embedded processor. > + > +FPGA connected to I2C: > +Required properties: > + > + - compatible: should be a board-specific string followed by a string > + indicating the type of FPGA. Example: > + "fsl,-fpga", "fsl,fpga-qixis-i2c" You don't really need the '-i2c' part because it will only get bound to an i2c based driver when a child of an i2c controller. > + - reg : i2c address of the qixis device. > + > +Example (LX2160A-QDS): > + /* The FPGA node */ > +fpga@66 { > + compatible = "fsl,lx2160aqds-fpga", "fsl,fpga-qixis-i2c"; > + reg = <0x66>; > + #address-cells = <1>; > + #size-cells = <0>; You don't need this unless you have child nodes with 'reg'. > + } > + > +* Freescale on-board FPGA > + > +This is the memory-mapped registers for on board FPGA. > + > +Required properties: > +- compatible: should be a board-specific string followed by a string > + indicating the type of FPGA. Example: > + "fsl,-fpga", "fsl,fpga-qixis" > +- reg: should contain the address and the length of the FPGA register set. > + > +Example (LS2080A-RDB): > + > +cpld@3,0 { > +compatible = "fsl,ls2080ardb-fpga", "fsl,fpga-qixis"; > +reg = <0x3 0 0x1>; > +}; > + > -- > 2.17.1 >
Re: [QUESTION] powerpc, libseccomp, and spu
On Tue, Feb 12, 2019 at 9:50 AM Tom Hromatka wrote: > On 2/11/19 11:54 AM, Tom Hromatka wrote: > > PowerPC experts, > > > > Paul Moore and I are working on the v2.4 release of libseccomp, > > and as part of this work I need to update the syscall table for > > each architecture. > > > > I have incorporated the new ppc syscall.tbl into libseccomp, but > > I am not familiar with the value of "spu" in the ABI column. For > > example: > > > > 2232umountsys_oldumount > > 2264umountsys_ni_syscall > > 22spuumountsys_ni_syscall > > > > In libseccomp, we maintain a 32-bit ppc syscall table and a 64-bit > > ppc syscall table. Do we also need to add a "spu" ppc syscall > > table? Some clarification on the syscalls marked "spu" and "nospu" > > would be greatly appreciated. > > Thanks for the awesome responses, Ben and Michael. I'll definitely > get Paul's input as well, but it sounds reasonable to exclude SPUs > from the newest libseccomp release. Based on this thread, I don't think we need to worry about "spu" at this point in time. Thanks everyone. > Michael's recommendation to replace "nospu" with common" and ignore > "spu" entirely has allowed ppc and ppc64 to pass all of our internal > checks. > > Thanks again! > > Tom -- paul moore www.paul-moore.com
[PATCH v3 2/2] locking/rwsem: Optimize down_read_trylock()
Modify __down_read_trylock() to optimize for an unlocked rwsem and make it generate slightly better code. Before this patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: jmp0x18 0x0007 <+7>: lea0x1(%rdx),%rcx 0x000b <+11>:mov%rdx,%rax 0x000e <+14>:lock cmpxchg %rcx,(%rdi) 0x0013 <+19>:cmp%rax,%rdx 0x0016 <+22>:je 0x23 0x0018 <+24>:mov(%rdi),%rdx 0x001b <+27>:test %rdx,%rdx 0x001e <+30>:jns0x7 0x0020 <+32>:xor%eax,%eax 0x0022 <+34>:retq 0x0023 <+35>:mov%gs:0x0,%rax 0x002c <+44>:or $0x3,%rax 0x0030 <+48>:mov%rax,0x20(%rdi) 0x0034 <+52>:mov$0x1,%eax 0x0039 <+57>:retq After patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: xor%eax,%eax 0x0007 <+7>: lea0x1(%rax),%rdx 0x000b <+11>:lock cmpxchg %rdx,(%rdi) 0x0010 <+16>:jne0x29 0x0012 <+18>:mov%gs:0x0,%rax 0x001b <+27>:or $0x3,%rax 0x001f <+31>:mov%rax,0x20(%rdi) 0x0023 <+35>:mov$0x1,%eax 0x0028 <+40>:retq 0x0029 <+41>:test %rax,%rax 0x002c <+44>:jns0x7 0x002e <+46>:xor%eax,%eax 0x0030 <+48>:retq By using a rwsem microbenchmark, the down_read_trylock() rate (with a load of 10 to lengthen the lock critical section) on a x86-64 system before and after the patch were: Before PatchAfter Patch # of Threads rlock rlock - - 1 14,496 14,716 28,644 8,453 46,799 6,983 85,664 7,190 On a ARM64 system, the performance results were: Before PatchAfter Patch # of Threads rlock rlock - - 1 23,676 24,488 27,697 9,502 44,945 3,440 82,641 1,603 For the uncontended case (1 thread), the new down_read_trylock() is a little bit faster. For the contended cases, the new down_read_trylock() perform pretty well in x86-64, but performance degrades at high contention level on ARM64. Suggested-by: Linus Torvalds Signed-off-by: Waiman Long --- kernel/locking/rwsem.h | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 067e265..e0bcc11 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -175,14 +175,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { - long tmp; + /* +* Optimize for the case when the rwsem is not locked at all. +*/ + long tmp = RWSEM_UNLOCKED_VALUE; - while ((tmp = atomic_long_read(>count)) >= 0) { - if (tmp == atomic_long_cmpxchg_acquire(>count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { + do { + if (atomic_long_try_cmpxchg_acquire(>count, , + tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } - } + } while (tmp >= 0); return 0; } -- 1.8.3.1
[PATCH v3 1/2] locking/rwsem: Remove arch specific rwsem files
As the generic rwsem-xadd code is using the appropriate acquire and release versions of the atomic operations, the arch specific rwsem.h files will not be that much faster than the generic code as long as the atomic functions are properly implemented. So we can remove those arch specific rwsem.h and stop building asm/rwsem.h to reduce maintenance effort. Currently, only x86, alpha and ia64 have implemented architecture specific fast paths. I don't have access to alpha and ia64 systems for testing, but they are legacy systems that are not likely to be updated to the latest kernel anyway. By using a rwsem microbenchmark, the total locking rates on a 4-socket 56-core 112-thread x86-64 system before and after the patch were as follows (mixed means equal # of read and write locks): Before Patch After Patch # of Threads wlock rlock mixed wlock rlock mixed - - - - - - 129,201 30,143 29,45828,615 30,172 29,201 2 6,807 13,299 1,171 7,725 15,025 1,804 4 6,504 12,755 1,520 7,127 14,286 1,345 8 6,762 13,412 764 6,826 13,652 726 16 6,693 15,408 662 6,599 15,938 626 32 6,145 15,286 496 5,549 15,487 511 64 5,812 15,495 60 5,858 15,572 60 There were some run-to-run variations for the multi-thread tests. For x86-64, using the generic C code fast path seems to be a little bit faster than the assembly version with low lock contention. Looking at the assembly version of the fast paths, there are assembly to/from C code wrappers that save and restore all the callee-clobbered registers (7 registers on x86-64). The assembly generated from the generic C code doesn't need to do that. That may explain the slight performance gain here. The generic asm rwsem.h can also be merged into kernel/locking/rwsem.h with no code change as no other code other than those under kernel/locking needs to access the internal rwsem macros and functions. Signed-off-by: Waiman Long --- MAINTAINERS | 1 - arch/alpha/include/asm/rwsem.h | 211 --- arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/rwsem.h | 172 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild| 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/x86/include/asm/rwsem.h| 237 arch/x86/lib/Makefile | 1 - arch/x86/lib/rwsem.S| 156 -- arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/rwsem.h | 140 include/linux/rwsem.h | 4 +- kernel/locking/percpu-rwsem.c | 2 + kernel/locking/rwsem.h | 130 ++ 18 files changed, 133 insertions(+), 929 deletions(-) delete mode 100644 arch/alpha/include/asm/rwsem.h delete mode 100644 arch/ia64/include/asm/rwsem.h delete mode 100644 arch/x86/include/asm/rwsem.h delete mode 100644 arch/x86/lib/rwsem.S delete mode 100644 include/asm-generic/rwsem.h diff --git a/MAINTAINERS b/MAINTAINERS index 9919840..053f536 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8926,7 +8926,6 @@ F:arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h F: include/linux/rwsem*.h -F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h F: lib/locking*.[ch] F: kernel/locking/ diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h deleted file mode 100644 index cf8fc8f9..000 --- a/arch/alpha/include/asm/rwsem.h +++ /dev/null @@ -1,211 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ALPHA_RWSEM_H -#define _ALPHA_RWSEM_H - -/* - * Written by Ivan Kokshaysky , 2001. - * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h - */ - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#include - -#define RWSEM_UNLOCKED_VALUE 0xL -#define RWSEM_ACTIVE_BIAS 0x0001L -#define RWSEM_ACTIVE_MASK 0xL -#define RWSEM_WAITING_BIAS (-0x0001L) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -static inline int ___down_read(struct rw_semaphore *sem) -{ - long oldcount; -#ifndefCONFIG_SMP - oldcount = sem->count.counter; - sem->count.counter += RWSEM_ACTIVE_READ_BIAS; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - "
[PATCH v3 0/2] locking/rwsem: Remove arch specific rwsem files
v3: - Optimize __down_read_trylock() for the uncontended case as suggested by Linus. v2: - Add patch 2 to optimize __down_read_trylock() as suggested by PeterZ. - Update performance test data in patch 1. This is part 0 of my rwsem patchset. It just removes the architecture specific files to make it easer to add enhancements in the upcoming rwsem patches. Since the two ll/sc platforms that I can tested on (arm64 & ppc) are both using the generic C codes, the rwsem performance shouldn't be affected by this patch except the down_read_trylock() code which was included in patch 2 for arm64. Waiman Long (2): locking/rwsem: Remove arch specific rwsem files locking/rwsem: Optimize down_read_trylock() MAINTAINERS | 1 - arch/alpha/include/asm/rwsem.h | 211 --- arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/rwsem.h | 172 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild| 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/x86/include/asm/rwsem.h| 237 arch/x86/lib/Makefile | 1 - arch/x86/lib/rwsem.S| 156 -- arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/rwsem.h | 140 include/linux/rwsem.h | 4 +- kernel/locking/percpu-rwsem.c | 2 + kernel/locking/rwsem.h | 133 ++ 18 files changed, 136 insertions(+), 929 deletions(-) delete mode 100644 arch/alpha/include/asm/rwsem.h delete mode 100644 arch/ia64/include/asm/rwsem.h delete mode 100644 arch/x86/include/asm/rwsem.h delete mode 100644 arch/x86/lib/rwsem.S delete mode 100644 include/asm-generic/rwsem.h -- 1.8.3.1
Re: [PATCH 1/5] vfio/type1: use pinned_vm instead of locked_vm to account pinned pages
On Tue, 12 Feb 2019 19:26:50 -0500 Daniel Jordan wrote: > On Tue, Feb 12, 2019 at 11:41:10AM -0700, Alex Williamson wrote: > > Daniel Jordan wrote: > > > On Mon, Feb 11, 2019 at 03:56:20PM -0700, Jason Gunthorpe wrote: > > > > I haven't looked at this super closely, but how does this stuff work? > > > > > > > > do_mlock doesn't touch pinned_vm, and this doesn't touch locked_vm... > > > > > > > > Shouldn't all this be 'if (locked_vm + pinned_vm < RLIMIT_MEMLOCK)' ? > > > > > > > > Otherwise MEMLOCK is really doubled.. > > > > > > So this has been a problem for some time, but it's not as easy as adding > > > them > > > together, see [1][2] for a start. > > > > > > The locked_vm/pinned_vm issue definitely needs fixing, but all this > > > series is > > > trying to do is account to the right counter. > > Thanks for taking a look, Alex. > > > This still makes me nervous because we have userspace dependencies on > > setting process locked memory. > > Could you please expand on this? Trying to get more context. VFIO is a userspace driver interface and the pinned/locked page accounting we're doing here is trying to prevent a user from exceeding their locked memory limits. Thus a VM management tool or unprivileged userspace driver needs to have appropriate locked memory limits configured for their use case. Currently we do not have a unified accounting scheme, so if a page is mlock'd by the user and also mapped through VFIO for DMA, it's accounted twice, these both increment locked_vm and userspace needs to manage that. If pinned memory and locked memory are now two separate buckets and we're only comparing one of them against the locked memory limit, then it seems we have effectively doubled the user's locked memory for this use case, as Jason questioned. The user could mlock one page and DMA map another, they're both "locked", but now they only take one slot in each bucket. If we continue forward with using a separate bucket here, userspace could infer that accounting is unified and lower the user's locked memory limit, or exploit the gap that their effective limit might actually exceed system memory. In the former case, if we do eventually correct to compare the total of the combined buckets against the user's locked memory limits, we'll break users that have adapted their locked memory limits to meet the apparent needs. In the latter case, the inconsistent accounting is potentially an attack vector. > > There's a user visible difference if we > > account for them in the same bucket vs separate. Perhaps we're > > counting in the wrong bucket now, but if we "fix" that and userspace > > adapts, how do we ever go back to accounting both mlocked and pinned > > memory combined against rlimit? Thanks, > > PeterZ posted an RFC that addresses this point[1]. It kept pinned_vm and > locked_vm accounting separate, but allowed the two to be added safely to be > compared against RLIMIT_MEMLOCK. Unless I'm incorrect in the concerns above, I don't see how we can convert vfio before this occurs. > Anyway, until some solution is agreed on, are there objections to converting > locked_vm to an atomic, to avoid user-visible changes, instead of switching > locked_vm users to pinned_vm? Seems that as long as we have separate buckets that are compared individually to rlimit that we've got problems, it's just a matter of where they're exposed based on which bucket is used for which interface. Thanks, Alex
Re: [PATCH-tip 00/22] locking/rwsem: Rework rwsem-xadd & enable new rwsem features
Ok, those test robot reports are hard to read, but trying to distill it down: On Wed, Feb 13, 2019 at 1:19 AM Chen Rong wrote: > > %stddev %change %stddev > \ |\ > 196250 ± 8% -64.1% 70494will-it-scale.per_thread_ops That's the original 64% regression.. And then with the patch set: > %stddev change %stddev > \ |\ > 71190 180% 199232 ± 4% will-it-scale.per_thread_ops looks like it's back up where it used to be. So I guess we have numbers for the regression now. Thanks. And that closes my biggest question for the new model, and with the new organization that gets ird of the arch-specific asm separately first and makes it a bit more legible that way, I guess I'll just Ack the whole series. Linus
Re: [PATCH v2] hugetlb: allow to free gigantic pages regardless of the configuration
> -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || > defined(CONFIG_CMA) > +#ifdef CONFIG_COMPACTION_CORE > static __init int gigantic_pages_init(void) > { > /* With compaction or CMA we can allocate gigantic pages at runtime */ > diff --git a/fs/Kconfig b/fs/Kconfig > index ac474a61be37..8fecd3ea5563 100644 > --- a/fs/Kconfig > +++ b/fs/Kconfig > @@ -207,8 +207,9 @@ config HUGETLB_PAGE > config MEMFD_CREATE > def_bool TMPFS || HUGETLBFS > > -config ARCH_HAS_GIGANTIC_PAGE > +config COMPACTION_CORE > bool > + default y if (MEMORY_ISOLATION && MIGRATION) || CMA This takes a hard dependency (#if) and turns it into a Kconfig *default* that can be overridden. That seems like trouble. Shouldn't it be: config COMPACTION_CORE def_bool y depends on (MEMORY_ISOLATION && MIGRATION) || CMA ?
[PATCH v2] hugetlb: allow to free gigantic pages regardless of the configuration
On systems without CMA or (MEMORY_ISOLATION && COMPACTION) activated but that support gigantic pages, boottime reserved gigantic pages can not be freed at all. This patch simply enables the possibility to hand back those pages to memory allocator. This patch also renames: - the triplet CMA or (MEMORY_ISOLATION && COMPACTION) into COMPACTION_CORE, and gets rid of all use of it in architecture specific code (and then removes ARCH_HAS_GIGANTIC_PAGE config). - gigantic_page_supported to make it more accurate: this value being false does not mean that the system cannot use gigantic pages, it just means that runtime allocation of gigantic pages is not supported, one can still allocate boottime gigantic pages if the architecture supports it. Signed-off-by: Alexandre Ghiti --- Changes in v2 as suggested by Vlastimil Babka: - Get rid of ARCH_HAS_GIGANTIC_PAGE - Get rid of architecture specific gigantic_page_supported - Factorize CMA or (MEMORY_ISOLATION && COMPACTION) into COMPACTION_CORE Compiles on all arches and validated on riscv. arch/arm64/Kconfig | 1 - arch/arm64/include/asm/hugetlb.h | 4 -- arch/powerpc/include/asm/book3s/64/hugetlb.h | 7 arch/powerpc/platforms/Kconfig.cputype | 1 - arch/s390/Kconfig| 1 - arch/s390/include/asm/hugetlb.h | 3 -- arch/x86/Kconfig | 1 - arch/x86/include/asm/hugetlb.h | 4 -- arch/x86/mm/hugetlbpage.c| 2 +- fs/Kconfig | 3 +- include/linux/gfp.h | 4 +- mm/hugetlb.c | 44 +++- mm/page_alloc.c | 7 ++-- 13 files changed, 32 insertions(+), 50 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a4168d366127..6c778046b9f7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -18,7 +18,6 @@ config ARM64 select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index fb6609875455..59893e766824 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -65,8 +65,4 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, #include -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* __ASM_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 5b0177733994..d04a0bcc2f1c 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -32,13 +32,6 @@ static inline int hstate_get_psize(struct hstate *hstate) } } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) -{ - return true; -} -#endif - /* hugepd entry valid bit */ #define HUGEPD_VAL_BITS(0x8000UL) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8c7464c3f27f..3e629dfb5efa 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -319,7 +319,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed554b09eb3f..556860f290e9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -69,7 +69,6 @@ config S390 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 2d1afa58a4b6..bd191560efcf 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -116,7 +116,4 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) return pte_modify(pte, newprot); } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 68261430fe6e..2fd983e2b2f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,7 +23,6 @@ config X86_64
Re: [PATCH] hugetlb: allow to free gigantic pages regardless of the configuration
On 2/13/19 6:27 AM, Vlastimil Babka wrote: On 1/17/19 7:39 PM, Alexandre Ghiti wrote: From: Alexandre Ghiti On systems without CMA or (MEMORY_ISOLATION && COMPACTION) activated but that support gigantic pages, boottime reserved gigantic pages can not be freed at all. This patchs simply enables the possibility to hand back those pages to memory allocator. This commit then renames gigantic_page_supported and ARCH_HAS_GIGANTIC_PAGE to make them more accurate. Indeed, those values being false does not mean that the system cannot use gigantic pages: it just means that runtime allocation of gigantic pages is not supported, one can still allocate boottime gigantic pages if the architecture supports it. Signed-off-by: Alexandre Ghiti I'm fine with the change, but wonder if this can be structured better in a way which would remove the duplicated "if (MEMORY_ISOLATION && COMPACTION) || CMA" from all arches, as well as the duplicated gigantic_page_runtime_allocation_supported() Yeah, totally, we can factorize more than what I did. I prepared a v2 of this patch that does exactly that: remove the triplet from arch specific code and the duplicated gigantic_page_runtime_allocation_supported. something like: - "select ARCH_HAS_GIGANTIC_PAGE" has no conditions, it just says the arch can support them either at boottime or runtime (but runtime is usable only if other conditions are met) And the v2 gets rid of ARCH_HAS_GIGANTIC_PAGE totally since it is not needed by arch to advertise the fact they support gigantic page, actually, when selected, it really just means that an arch has the means to allocate runtime gigantic page: it is equivalent to (MEMORY_ISOLATION && COMPACTION) || CMA. - gigantic_page_runtime_allocation_supported() is a function that returns true if ARCH_HAS_GIGANTIC_PAGE && ((MEMORY_ISOLATION && COMPACTION) || CMA) and there's a single instance, not per-arch. - code for freeing gigantic pages can probably still be conditional on ARCH_HAS_GIGANTIC_PAGE BTW I wanted also to do something about the "(MEMORY_ISOLATION && COMPACTION) || CMA" ugliness itself, i.e. put the common parts behind some new kconfig (COMPACTION_CORE ?) and expose it better to users, but I can take a stab on that once the above part is settled. Vlastimil I send the v2 right away, if you can take a look Vlastimil, that would be great. Note that Andrew already picked this patch in its tree, I'm not sure how to proceed. Thanks for your remarks ! Alex
Re: [PATCH 06/12] dma-mapping: improve selection of dma_declare_coherent availability
On Wed, Feb 13, 2019 at 12:24 PM Christoph Hellwig wrote: > > On Tue, Feb 12, 2019 at 02:40:23PM -0600, Rob Herring wrote: > > > diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig > > > index 3607fd2810e4..f8c66a9472a4 100644 > > > --- a/drivers/of/Kconfig > > > +++ b/drivers/of/Kconfig > > > @@ -43,6 +43,7 @@ config OF_FLATTREE > > > > > > config OF_EARLY_FLATTREE > > > bool > > > + select DMA_DECLARE_COHERENT > > > > Is selecting DMA_DECLARE_COHERENT okay on UML? We run the unittests with > > UML. > > No, that will fail with undefined references to memunmap. > > I gues this needs to be > > select DMA_DECLARE_COHERENT if HAS_DMA > > > Maybe we should just get rid of OF_RESERVED_MEM. If we support booting > > from DT, then it should always be enabled anyways. > > Fine with me. Do you want me to respin the series to just remove > it? Either now or it can wait. I don't want to hold this up any. Rob
Re: [PATCH 06/12] dma-mapping: improve selection of dma_declare_coherent availability
On Tue, Feb 12, 2019 at 02:40:23PM -0600, Rob Herring wrote: > > diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig > > index 3607fd2810e4..f8c66a9472a4 100644 > > --- a/drivers/of/Kconfig > > +++ b/drivers/of/Kconfig > > @@ -43,6 +43,7 @@ config OF_FLATTREE > > > > config OF_EARLY_FLATTREE > > bool > > + select DMA_DECLARE_COHERENT > > Is selecting DMA_DECLARE_COHERENT okay on UML? We run the unittests with UML. No, that will fail with undefined references to memunmap. I gues this needs to be select DMA_DECLARE_COHERENT if HAS_DMA > Maybe we should just get rid of OF_RESERVED_MEM. If we support booting > from DT, then it should always be enabled anyways. Fine with me. Do you want me to respin the series to just remove it?
Re: [PATCH 01/12] mfd/sm501: depend on HAS_DMA
On Wed, Feb 13, 2019 at 07:29:31AM +, Lee Jones wrote: > I would normally have taken this, but I fear it will conflict with > [PATCH 06/12]. For that reason, just take my: > > Acked-by: Lee Jones Yes, I'll need it for the later patches in the series. Thanks for the review.
Re: [PATCH 03/12] of: mark early_init_dt_alloc_reserved_memory_arch static
On Tue, Feb 12, 2019 at 02:24:19PM -0600, Rob Herring wrote: > Looks like this one isn't a dependency, so I can take it if you want. Sure, please go ahead.
[PATCH 11/11] s390: don't redefined the HAS_IOMEM symbol
Rely on the common defintion instead. Signed-off-by: Christoph Hellwig --- arch/s390/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9a25e19364f5..0f62e33ffcb2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -159,6 +159,7 @@ config S390 select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI + select NO_IOMEM if !PCI select OLD_SIGACTION select OLD_SIGSUSPEND3 select PCI_DOMAINS if PCI @@ -708,9 +709,6 @@ config PCI_NR_FUNCTIONS endif # PCI -config HAS_IOMEM - def_bool PCI - config CHSC_SCH def_tristate m prompt "Support for CHSC subchannels" -- 2.20.1
[PATCH 10/11] lib: consolidate the GENERIC_HWEIGHT symbol
Introduce a new ARCH_HAS_HWEIGHT symbol for alpha and ia64, and just default to the generic version otherwise. Signed-off-by: Christoph Hellwig --- arch/alpha/Kconfig | 5 + arch/arc/Kconfig| 3 --- arch/arm/Kconfig| 4 arch/arm64/Kconfig | 3 --- arch/c6x/Kconfig| 3 --- arch/csky/Kconfig | 3 --- arch/h8300/Kconfig | 3 --- arch/hexagon/Kconfig| 3 --- arch/ia64/Kconfig | 1 + arch/m68k/Kconfig | 4 arch/microblaze/Kconfig | 3 --- arch/mips/Kconfig | 4 arch/nds32/Kconfig | 3 --- arch/nios2/Kconfig | 3 --- arch/openrisc/Kconfig | 3 --- arch/parisc/Kconfig | 4 arch/powerpc/Kconfig| 4 arch/riscv/Kconfig | 3 --- arch/s390/Kconfig | 3 --- arch/sh/Kconfig | 3 --- arch/sparc/Kconfig | 4 arch/unicore32/Kconfig | 3 --- arch/x86/Kconfig| 3 --- arch/x86/um/Kconfig | 3 --- arch/xtensa/Kconfig | 3 --- lib/Kconfig | 7 +++ 26 files changed, 9 insertions(+), 79 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 65b5514e5a7f..a549c53563e8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -2,6 +2,7 @@ config ALPHA bool default y + select ARCH_HAS_HWEIGHT if ALPHA_EV67 select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT @@ -446,10 +447,6 @@ config ALPHA_IRONGATE depends on ALPHA_NAUTILUS default y -config GENERIC_HWEIGHT - bool - default y if !ALPHA_EV67 - config ALPHA_AVANTI bool depends on ALPHA_XL || ALPHA_AVANTI_CH diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6476404b98b8..8bf4c0f7cc1d 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -72,9 +72,6 @@ config MMU config NO_IOPORT_MAP def_bool y -config GENERIC_HWEIGHT - def_bool y - config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y depends on ARC_MMU_V4 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c230fb1e09ba..b47825767e3d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -182,10 +182,6 @@ config ARCH_HAS_BANDGAP config FIX_EARLYCON_MEM def_bool y if MMU -config GENERIC_HWEIGHT - bool - default y - config ARCH_MAY_HAVE_PC_FDC bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7cc3334aba29..98c3776ccf6b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -237,9 +237,6 @@ config ILLEGAL_POINTER_VALUE hex default 0xdead -config GENERIC_HWEIGHT - def_bool y - config ZONE_DMA32 def_bool y diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 19b145ef7d92..c439d2f46af0 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -28,9 +28,6 @@ config MMU config FPU def_bool n -config GENERIC_HWEIGHT - def_bool y - config C6X_BIG_KERNEL bool "Build a big kernel" help diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index c0a49cbd3df0..14a9905e99a4 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -84,9 +84,6 @@ config CPU_NO_USER_BKPT instruction exception. In kernel we parse the *regs->pc to determine whether to send SIGTRAP or not. -config GENERIC_HWEIGHT - def_bool y - config MMU def_bool y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 4f5a1efab822..77ce104bb42e 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -27,9 +27,6 @@ config H8300 config CPU_BIG_ENDIAN def_bool y -config GENERIC_HWEIGHT - def_bool y - config NO_IOPORT_MAP def_bool y diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 7a01f4c5a4f6..fb2996063d5a 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -61,9 +61,6 @@ config MMU config GENERIC_IRQ_PROBE def_bool y -config GENERIC_HWEIGHT - def_bool y - menu "Machine selection" choice diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fc44c046953e..c9eb106b1f4c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_HAS_HWEIGHT select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ACPI if (!IA64_HP_SIM) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 1bf6abaea604..91b150b6572c 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -38,10 +38,6 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 bool -config GENERIC_HWEIGHT - bool - default y - config TIME_LOW_RES bool default y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d8907d6f969c..0185ac1f0268 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -71,9 +71,6 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 def_bool n -config GENERIC_HWEIGHT - def_bool y -
[PATCH 09/11] lib: consolidate the GENERIC_CSUM symbol
Add one definition to lib/Kconfig and let the architectures select if it supported. Signed-off-by: Christoph Hellwig --- arch/arc/Kconfig| 4 +--- arch/arm64/Kconfig | 4 +--- arch/csky/Kconfig | 4 +--- arch/h8300/Kconfig | 4 +--- arch/hexagon/Kconfig| 4 +--- arch/m68k/Kconfig | 3 --- arch/microblaze/Kconfig | 4 +--- arch/mips/Kconfig | 5 + arch/nds32/Kconfig | 4 +--- arch/nios2/Kconfig | 4 +--- arch/openrisc/Kconfig | 6 +- arch/powerpc/Kconfig| 3 --- arch/riscv/Kconfig | 4 +--- arch/sh/Kconfig | 5 + arch/unicore32/Kconfig | 3 --- lib/Kconfig | 3 +++ 16 files changed, 15 insertions(+), 49 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e965383c05d7..6476404b98b8 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -20,6 +20,7 @@ config ARC select GENERIC_CALIBRATE_DELAY select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS + select GENERIC_CSUM select GENERIC_FIND_FIRST_BIT # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP select GENERIC_IRQ_SHOW @@ -59,9 +60,6 @@ config ARCH_HAS_CACHE_LINE_SIZE config SCHED_OMIT_FRAME_POINTER def_bool y -config GENERIC_CSUM - def_bool y - config ARCH_DISCONTIGMEM_ENABLE def_bool n diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 913b2ca7ec22..7cc3334aba29 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -91,6 +91,7 @@ config ARM64 select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CSUM select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_MULTI_HANDLER @@ -239,9 +240,6 @@ config ILLEGAL_POINTER_VALUE config GENERIC_HWEIGHT def_bool y -config GENERIC_CSUM -def_bool y - config ZONE_DMA32 def_bool y diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 4085ba807e0c..c0a49cbd3df0 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -12,6 +12,7 @@ config CSKY select HANDLE_DOMAIN_IRQ select DW_APB_TIMER_OF select GENERIC_CALIBRATE_DELAY + select GENERIC_CSUM select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 @@ -83,9 +84,6 @@ config CPU_NO_USER_BKPT instruction exception. In kernel we parse the *regs->pc to determine whether to send SIGTRAP or not. -config GENERIC_CSUM - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ba33326e7c54..4f5a1efab822 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -9,6 +9,7 @@ config H8300 select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA select GENERIC_CLOCKEVENTS + select GENERIC_CSUM select CLKDEV_LOOKUP select COMMON_CLK select ARCH_WANT_FRAME_POINTERS @@ -32,9 +33,6 @@ config GENERIC_HWEIGHT config NO_IOPORT_MAP def_bool y -config GENERIC_CSUM -def_bool y - config HZ int default 100 diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 5eb4f48506b6..7a01f4c5a4f6 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -19,6 +19,7 @@ config HEXAGON select HAVE_PERF_EVENTS # GENERIC_ALLOCATOR is used by dma_alloc_coherent() select GENERIC_ALLOCATOR + select GENERIC_CSUM select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK @@ -54,9 +55,6 @@ config EARLY_PRINTK config MMU def_bool y -config GENERIC_CSUM - def_bool y - # # Use the generic interrupt handling code in kernel/irq/: # diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index ed03da5430d9..1bf6abaea604 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -42,9 +42,6 @@ config GENERIC_HWEIGHT bool default y -config GENERIC_CSUM - bool - config TIME_LOW_RES bool default y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 65a44727a7a2..d8907d6f969c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -16,6 +16,7 @@ config MICROBLAZE select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES + select GENERIC_CSUM select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -73,9 +74,6 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_HWEIGHT def_bool y -config GENERIC_CSUM - def_bool y - source "arch/microblaze/Kconfig.platform" menu "Processor type and features" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 530eaf950744..bd0c9be7e7cf 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -23,6 +23,7 @@ config MIPS
[PATCH 08/11] lib: consolidate the GENERIC_BUG symbol
And just let the architectures that want it select the symbol. Same for GENERIC_BUG_RELATIVE_POINTERS. Signed-off-by: Christoph Hellwig --- arch/arm/Kconfig | 5 + arch/arm64/Kconfig | 10 ++ arch/c6x/Kconfig | 5 + arch/hexagon/Kconfig | 5 + arch/parisc/Kconfig | 6 +- arch/powerpc/Kconfig | 6 +- arch/riscv/Kconfig | 10 ++ arch/s390/Kconfig| 8 ++-- arch/sh/Kconfig | 5 + arch/x86/Kconfig | 10 ++ lib/Kconfig | 6 ++ 11 files changed, 20 insertions(+), 56 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 96780ab64a2e..c230fb1e09ba 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,6 +36,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select GENERIC_BUG if BUG select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CPU_AUTOPROBE @@ -256,10 +257,6 @@ config PHYS_OFFSET Please provide the physical address corresponding to the location of main memory in your system. -config GENERIC_BUG - def_bool y - depends on BUG - config PGTABLE_LEVELS int default 3 if ARM_LPAE diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c39dac831f08..913b2ca7ec22 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -85,6 +85,8 @@ config ARM64 select FRAME_POINTER select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY + select GENERIC_BUG if BUG + select GENERIC_BUG_RELATIVE_POINTERS select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST @@ -234,14 +236,6 @@ config ILLEGAL_POINTER_VALUE hex default 0xdead -config GENERIC_BUG - def_bool y - depends on BUG - -config GENERIC_BUG_RELATIVE_POINTERS - def_bool y - depends on GENERIC_BUG - config GENERIC_HWEIGHT def_bool y diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index d5f382830f49..19b145ef7d92 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -10,6 +10,7 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP select GENERIC_ATOMIC64 + select GENERIC_BUG if BUG select GENERIC_CALIBRATE_DELAY select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK @@ -30,10 +31,6 @@ config FPU config GENERIC_HWEIGHT def_bool y -config GENERIC_BUG - def_bool y - depends on BUG - config C6X_BIG_KERNEL bool "Build a big kernel" help diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 425217c98a77..5eb4f48506b6 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -14,6 +14,7 @@ config HEXAGON # select GPIOLIB # select HAVE_CLK # select GENERIC_PENDING_IRQ if SMP + select GENERIC_BUG if BUG select GENERIC_ATOMIC64 select HAVE_PERF_EVENTS # GENERIC_ALLOCATOR is used by dma_alloc_coherent() @@ -65,10 +66,6 @@ config GENERIC_IRQ_PROBE config GENERIC_HWEIGHT def_bool y -config GENERIC_BUG - def_bool y - depends on BUG - menu "Machine selection" choice diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fcbc67b6b830..42282b8e086d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -27,6 +27,7 @@ config PARISC select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ select GENERIC_ATOMIC64 if !64BIT + select GENERIC_BUG if BUG select GENERIC_CALIBRATE_DELAY select GENERIC_IRQ_PROBE select GENERIC_PCI_IOMAP @@ -85,11 +86,6 @@ config ARCH_HAS_ILOG2_U64 bool default n -config GENERIC_BUG - bool - default y - depends on BUG - config GENERIC_HWEIGHT bool default y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8b9f3639555f..1684017fa496 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,6 +142,7 @@ config PPC select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_ATOMIC64 if PPC32 + select GENERIC_BUG if BUG select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCASTif SMP select GENERIC_CMOS_UPDATE @@ -283,11 +284,6 @@ config AUDIT_ARCH bool default y -config GENERIC_BUG - bool - default y - depends on BUG - config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 732614eb3683..c410ed896567 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -19,6 +19,8 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK +
[PATCH 07/11] init: consolidate the GENERIC_CALIBRATE_DELAY symbol
Add one definition to init/Kconfig and let the architectures select it if supported. The only complication is xtensa, where it is a user visible option - we introduce a xtensa-specific symbol instead to work around this. Signed-off-by: Christoph Hellwig --- arch/alpha/Kconfig | 5 + arch/arc/Kconfig| 4 +--- arch/arm/Kconfig| 5 + arch/arm64/Kconfig | 4 +--- arch/c6x/Kconfig| 4 +--- arch/csky/Kconfig | 4 +--- arch/ia64/Kconfig | 5 + arch/m68k/Kconfig | 5 + arch/microblaze/Kconfig | 4 +--- arch/mips/Kconfig | 5 + arch/nds32/Kconfig | 4 +--- arch/nios2/Kconfig | 4 +--- arch/parisc/Kconfig | 5 + arch/riscv/Kconfig | 4 +--- arch/sh/Kconfig | 3 --- arch/sparc/Kconfig | 5 + arch/um/Kconfig | 5 + arch/unicore32/Kconfig | 4 +--- arch/x86/Kconfig| 4 +--- arch/xtensa/Kconfig | 3 ++- init/Kconfig| 3 +++ 21 files changed, 23 insertions(+), 66 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 474202d89b25..65b5514e5a7f 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -25,6 +25,7 @@ config ALPHA select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG select AUDIT_ARCH + select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS select GENERIC_CPU_VULNERABILITIES select GENERIC_SMP_IDLE_THREAD @@ -58,10 +59,6 @@ config ARCH_HAS_ILOG2_U64 bool default n -config GENERIC_CALIBRATE_DELAY - bool - default y - config ZONE_DMA bool default y diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b1d6f297e448..e965383c05d7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -17,6 +17,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select GENERIC_CALIBRATE_DELAY select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -73,9 +74,6 @@ config MMU config NO_IOPORT_MAP def_bool y -config GENERIC_CALIBRATE_DELAY - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d6e7713a71ae..96780ab64a2e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,6 +36,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP @@ -184,10 +185,6 @@ config GENERIC_HWEIGHT bool default y -config GENERIC_CALIBRATE_DELAY - bool - default y - config ARCH_MAY_HAVE_PC_FDC bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f3d3e48aff26..c39dac831f08 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -85,6 +85,7 @@ config ARM64 select FRAME_POINTER select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY + select GENERIC_CALIBRATE_DELAY select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE @@ -247,9 +248,6 @@ config GENERIC_HWEIGHT config GENERIC_CSUM def_bool y -config GENERIC_CALIBRATE_DELAY - def_bool y - config ZONE_DMA32 def_bool y diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index f11465554ecf..d5f382830f49 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -10,6 +10,7 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP select GENERIC_ATOMIC64 + select GENERIC_CALIBRATE_DELAY select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK select SPARSE_IRQ @@ -26,9 +27,6 @@ config MMU config FPU def_bool n -config GENERIC_CALIBRATE_DELAY - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 3c3de7ac95bf..4085ba807e0c 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -11,6 +11,7 @@ config CSKY select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ select DW_APB_TIMER_OF + select GENERIC_CALIBRATE_DELAY select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 @@ -82,9 +83,6 @@ config CPU_NO_USER_BKPT instruction exception. In kernel we parse the *regs->pc to determine whether to send SIGTRAP or not. -config GENERIC_CALIBRATE_DELAY - def_bool y - config GENERIC_CSUM def_bool y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8c14b669d194..fc44c046953e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -34,6 +34,7 @@ config IA64 select
[PATCH 06/11] lockdep: consolidate the LOCKDEP_SUPPORT symbol
Add one definition to lib/Kconfig.debug and let the architectures select if it supported. Signed-off-by: Christoph Hellwig --- arch/arc/Kconfig| 4 +--- arch/arm/Kconfig| 5 + arch/arm64/Kconfig | 4 +--- arch/hexagon/Kconfig| 4 +--- arch/microblaze/Kconfig | 4 +--- arch/mips/Kconfig | 5 + arch/openrisc/Kconfig | 4 +--- arch/powerpc/Kconfig| 5 + arch/s390/Kconfig | 4 +--- arch/sh/Kconfig | 4 +--- arch/sparc/Kconfig | 5 + arch/um/Kconfig | 5 + arch/unicore32/Kconfig | 4 +--- arch/x86/Kconfig| 4 +--- arch/xtensa/Kconfig | 4 +--- lib/Kconfig.debug | 3 +++ 16 files changed, 18 insertions(+), 50 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 065fbd55dcc4..b1d6f297e448 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -42,6 +42,7 @@ config ARC select HANDLE_DOMAIN_IRQ select IRQ_DOMAIN select MODULES_USE_ELF_RELA + select LOCKDEP_SUPPORT select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM @@ -54,9 +55,6 @@ config ARC config ARCH_HAS_CACHE_LINE_SIZE def_bool y -config LOCKDEP_SUPPORT - def_bool y - config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1995e1b24506..d6e7713a71ae 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -98,6 +98,7 @@ config ARM select HAVE_UID16 select HAVE_VIRT_CPU_ACCOUNTING_GEN select IRQ_FORCED_THREADING + select LOCKDEP_SUPPORT select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE select OF_EARLY_FLATTREE if OF @@ -167,10 +168,6 @@ config NO_IOPORT_MAP config SBUS bool -config LOCKDEP_SUPPORT - bool - default y - config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ecbe481ce064..f3d3e48aff26 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -157,6 +157,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING + select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA select MULTI_IRQ_HANDLER select NEED_DMA_MAP_STATE @@ -232,9 +233,6 @@ config ILLEGAL_POINTER_VALUE hex default 0xdead -config LOCKDEP_SUPPORT - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index b6c3111ec5f9..425217c98a77 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -21,6 +21,7 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select LOCKDEP_SUPPORT select ARCH_DISCARD_MEMBLOCK select NEED_SG_DMA_LENGTH select NO_IOPORT_MAP @@ -46,9 +47,6 @@ config HEXAGON_PHYS_OFFSET config FRAME_POINTER def_bool y -config LOCKDEP_SUPPORT - def_bool y - config EARLY_PRINTK def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 238f8b410331..1989ba1d1798 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -31,6 +31,7 @@ config MICROBLAZE select HAVE_OPROFILE select HAVE_PCI select IRQ_DOMAIN + select LOCKDEP_SUPPORT select XILINX_INTC select MODULES_USE_ELF_RELA select OF @@ -77,9 +78,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_CSUM def_bool y -config LOCKDEP_SUPPORT - def_bool y - source "arch/microblaze/Kconfig.platform" menu "Processor type and features" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d1c89635a459..88792685687c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -76,6 +76,7 @@ config MIPS select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP select IRQ_FORCED_THREADING select ISA if EISA + select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_REL if MODULES select PERF_USE_VMALLOC @@ -3051,10 +3052,6 @@ endchoice endmenu -config LOCKDEP_SUPPORT - bool - default y - config HAVE_LATENCYTOP_SUPPORT bool default y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 514787c0c469..6cb7632fa5b0 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -28,6 +28,7 @@ config OPENRISC select GENERIC_SMP_IDLE_THREAD select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select LOCKDEP_SUPPORT select OR1K_PIC select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select ARCH_USE_QUEUED_SPINLOCKS @@ -55,9 +56,6 @@ config NO_IOPORT_MAP config GENERIC_CSUM def_bool y -config LOCKDEP_SUPPORT - def_bool y - menu "Processor type and features" choice diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
[PATCH 05/11] tracing: consolidate the TRACE_IRQFLAGS_SUPPORT symbol
Add one definition to kernel/trace/Kconfig and let the architectures select if it supported. Signed-off-by: Christoph Hellwig --- arch/arc/Kconfig | 4 +--- arch/arm/Kconfig | 5 + arch/arm64/Kconfig| 4 +--- arch/csky/Kconfig | 4 +--- arch/hexagon/Kconfig | 4 +--- arch/microblaze/Kconfig | 1 + arch/microblaze/Kconfig.debug | 2 -- arch/mips/Kconfig | 1 + arch/mips/Kconfig.debug | 4 arch/nds32/Kconfig| 4 +--- arch/nios2/Kconfig| 4 +--- arch/nios2/Kconfig.debug | 3 --- arch/openrisc/Kconfig | 4 +--- arch/parisc/Kconfig | 1 + arch/parisc/Kconfig.debug | 2 -- arch/powerpc/Kconfig | 5 + arch/riscv/Kconfig| 4 +--- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 3 --- arch/sh/Kconfig | 1 + arch/sh/Kconfig.debug | 3 --- arch/sparc/Kconfig| 1 + arch/sparc/Kconfig.debug | 4 arch/um/Kconfig | 5 + arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug| 3 --- arch/xtensa/Kconfig | 4 +--- kernel/trace/Kconfig | 3 +++ 28 files changed, 22 insertions(+), 63 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e8e3776fc5fa..065fbd55dcc4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,13 +49,11 @@ config ARC select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select STACKTRACE_SUPPORT select STACKTRACE + select TRACE_IRQFLAGS_SUPPORT config ARCH_HAS_CACHE_LINE_SIZE def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config LOCKDEP_SUPPORT def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 68a891f3ffa0..1995e1b24506 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -111,6 +111,7 @@ config ARM select RWSEM_XCHGADD_ALGORITHM select STACKTRACE_SUPPORT select SYS_SUPPORTS_APM_EMULATION + select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. help @@ -170,10 +171,6 @@ config LOCKDEP_SUPPORT bool default y -config TRACE_IRQFLAGS_SUPPORT - bool - default !CPU_V7M - config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a6a0bb868369..ecbe481ce064 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -176,6 +176,7 @@ config ARM64 select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -234,9 +235,6 @@ config ILLEGAL_POINTER_VALUE config LOCKDEP_SUPPORT def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 90279a11fcf7..3c3de7ac95bf 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -46,6 +46,7 @@ config CSKY select PERF_USE_VMALLOC if CPU_CK610 select RTC_LIB select TIMER_OF + select TRACE_IRQFLAGS_SUPPORT select USB_ARCH_HAS_EHCI select USB_ARCH_HAS_OHCI @@ -99,9 +100,6 @@ config STACKTRACE_SUPPORT config TIME_LOW_RES def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config CPU_TLB_SIZE int default "128" if (CPU_CK610 || CPU_CK807 || CPU_CK810) diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 615693b62ea0..b6c3111ec5f9 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -33,6 +33,7 @@ config HEXAGON select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES select RWSEM_XCHGADD_ALGORITHM + select TRACE_IRQFLAGS_SUPPORT ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. @@ -54,9 +55,6 @@ config EARLY_PRINTK config MMU def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config GENERIC_CSUM def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index ecccf8651caa..238f8b410331 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -38,6 +38,7 @@ config MICROBLAZE select PCI_DOMAINS_GENERIC if PCI select PCI_SYSCALL if PCI select STACKTRACE_SUPPORT + select TRACE_IRQFLAGS_SUPPORT select TRACING_SUPPORT select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index dc2e3c45e8a2..617df4f48a5d 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -1,5 +1,3 @@ # For a description of the syntax of this configuration file, # see
[PATCH 04/11] tracing: consolidate the STACKTRACE_SUPPORT symbol
Add one definition to kernel/trace/Kconfig and let the architectures select if it supported. Signed-off-by: Christoph Hellwig --- arch/arc/Kconfig| 6 ++ arch/arm/Kconfig| 5 + arch/arm64/Kconfig | 4 +--- arch/hexagon/Kconfig| 5 + arch/ia64/Kconfig | 4 +--- arch/microblaze/Kconfig | 4 +--- arch/mips/Kconfig | 5 + arch/nds32/Kconfig | 4 +--- arch/openrisc/Kconfig | 4 +--- arch/parisc/Kconfig | 4 +--- arch/powerpc/Kconfig| 5 + arch/riscv/Kconfig | 4 +--- arch/s390/Kconfig | 4 +--- arch/sh/Kconfig | 4 +--- arch/sparc/Kconfig | 5 + arch/um/Kconfig | 7 ++- arch/unicore32/Kconfig | 4 +--- arch/x86/Kconfig| 4 +--- arch/xtensa/Kconfig | 4 +--- kernel/trace/Kconfig| 3 +++ 20 files changed, 24 insertions(+), 65 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c0dd229af534..e8e3776fc5fa 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -47,6 +47,8 @@ config ARC select OF_RESERVED_MEM select PCI_SYSCALL if PCI select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING + select STACKTRACE_SUPPORT + select STACKTRACE config ARCH_HAS_CACHE_LINE_SIZE def_bool y @@ -81,10 +83,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_HWEIGHT def_bool y -config STACKTRACE_SUPPORT - def_bool y - select STACKTRACE - config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y depends on ARC_MMU_V4 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1ed4c0560b50..68a891f3ffa0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -109,6 +109,7 @@ config ARM select REFCOUNT_FULL select RTC_LIB select RWSEM_XCHGADD_ALGORITHM + select STACKTRACE_SUPPORT select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. @@ -165,10 +166,6 @@ config NO_IOPORT_MAP config SBUS bool -config STACKTRACE_SUPPORT - bool - default y - config LOCKDEP_SUPPORT bool default y diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2f8da4e18a1e..a6a0bb868369 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -172,6 +172,7 @@ config ARM64 select REFCOUNT_FULL select RWSEM_XCHGADD_ALGORITHM select SPARSE_IRQ + select STACKTRACE_SUPPORT select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK @@ -226,9 +227,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX config NO_IOPORT_MAP def_bool y if !PCI -config STACKTRACE_SUPPORT - def_bool y - config ILLEGAL_POINTER_VALUE hex default 0xdead diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 49f364ea18d4..615693b62ea0 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -26,6 +26,7 @@ config HEXAGON select NO_IOPORT_MAP select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD + select STACKTRACE select STACKTRACE_SUPPORT select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST @@ -68,10 +69,6 @@ config GENERIC_IRQ_PROBE config GENERIC_HWEIGHT def_bool y -config STACKTRACE_SUPPORT - def_bool y - select STACKTRACE - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 99a629f05de4..8c14b669d194 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -54,6 +54,7 @@ config IA64 select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH select RWSEM_XCHGADD_ALGORITHM + select STACKTRACE_SUPPORT default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -78,9 +79,6 @@ config MMU bool default y -config STACKTRACE_SUPPORT - def_bool y - config GENERIC_LOCKBREAK def_bool n diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d459c70d9a66..ecccf8651caa 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -37,6 +37,7 @@ config MICROBLAZE select OF_EARLY_FLATTREE select PCI_DOMAINS_GENERIC if PCI select PCI_SYSCALL if PCI + select STACKTRACE_SUPPORT select TRACING_SUPPORT select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS @@ -75,9 +76,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_CSUM def_bool y -config STACKTRACE_SUPPORT - def_bool y - config LOCKDEP_SUPPORT def_bool y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6ad1882a8db9..0645e7b96493 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -80,6 +80,7 @@ config MIPS select MODULES_USE_ELF_REL if MODULES select PERF_USE_VMALLOC select RTC_LIB + select STACKTRACE_SUPPORT select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS
[PATCH 03/11] kernel/locks: consolidate RWSEM_GENERIC_* options
Introduce one central definition of RWSEM_XCHGADD_ALGORITHM and RWSEM_GENERIC_SPINLOCK in kernel/Kconfig.locks and let architectures select RWSEM_XCHGADD_ALGORITHM if they want it, otherwise default to the spinlock version. Signed-off-by: Christoph Hellwig --- arch/alpha/Kconfig | 8 +--- arch/arc/Kconfig| 3 --- arch/arm/Kconfig| 5 + arch/arm64/Kconfig | 4 +--- arch/c6x/Kconfig| 3 --- arch/csky/Kconfig | 3 --- arch/h8300/Kconfig | 3 --- arch/hexagon/Kconfig| 7 +-- arch/ia64/Kconfig | 5 + arch/m68k/Kconfig | 7 --- arch/microblaze/Kconfig | 6 -- arch/mips/Kconfig | 7 --- arch/nds32/Kconfig | 3 --- arch/nios2/Kconfig | 3 --- arch/openrisc/Kconfig | 6 -- arch/parisc/Kconfig | 6 -- arch/powerpc/Kconfig| 8 +--- arch/riscv/Kconfig | 3 --- arch/s390/Kconfig | 7 +-- arch/sh/Kconfig | 6 -- arch/sparc/Kconfig | 9 + arch/unicore32/Kconfig | 6 -- arch/x86/Kconfig| 4 +--- arch/x86/um/Kconfig | 7 +-- arch/xtensa/Kconfig | 4 +--- kernel/Kconfig.locks| 7 +++ 26 files changed, 18 insertions(+), 122 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 584a6e114853..474202d89b25 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -35,6 +35,7 @@ config ALPHA select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select RWSEM_XCHGADD_ALGORITHM select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 help The Alpha is a 64-bit general-purpose processor designed and @@ -49,13 +50,6 @@ config MMU bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config ARCH_HAS_ILOG2_U32 bool default n diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 376366a7db81..c0dd229af534 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER config GENERIC_CSUM def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config ARCH_DISCONTIGMEM_ENABLE def_bool n diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 664e918e2624..1ed4c0560b50 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -108,6 +108,7 @@ config ARM select PERF_USE_VMALLOC select REFCOUNT_FULL select RTC_LIB + select RWSEM_XCHGADD_ALGORITHM select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. @@ -176,10 +177,6 @@ config TRACE_IRQFLAGS_SUPPORT bool default !CPU_V7M -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a4168d366127..2f8da4e18a1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -170,6 +170,7 @@ config ARM64 select POWER_RESET select POWER_SUPPLY select REFCOUNT_FULL + select RWSEM_XCHGADD_ALGORITHM select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -238,9 +239,6 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config RWSEM_XCHGADD_ALGORITHM - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 456e154674d1..f11465554ecf 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -26,9 +26,6 @@ config MMU config FPU def_bool n -config RWSEM_GENERIC_SPINLOCK - def_bool y - config GENERIC_CALIBRATE_DELAY def_bool y diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 398113c845f5..90279a11fcf7 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -93,9 +93,6 @@ config GENERIC_HWEIGHT config MMU def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config STACKTRACE_SUPPORT def_bool y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 6472a0685470..ba33326e7c54 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -26,9 +26,6 @@ config H8300 config CPU_BIG_ENDIAN def_bool y -config RWSEM_GENERIC_SPINLOCK - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index fb2fbfcfc532..49f364ea18d4 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -31,6 +31,7 @@ config HEXAGON select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES + select RWSEM_XCHGADD_ALGORITHM ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. @@ -64,12 +65,6 @@ config GENERIC_CSUM config GENERIC_IRQ_PROBE
consolidate a few more arch support config options
Hi all, this series moves various config options that are defined in multiple arch Kconfig files into common files, usually close to the code supporting such features.
[PATCH 01/11] powerpc: remove dead ifdefs in
__KERNEL__ is never not defined for non-uapi headers, and GENERIC_CSUM isn't ever set for powerpc either. Signed-off-by: Christoph Hellwig --- arch/powerpc/include/asm/checksum.h | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index a78a57e5058d..37c309500260 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -1,6 +1,5 @@ #ifndef _ASM_POWERPC_CHECKSUM_H #define _ASM_POWERPC_CHECKSUM_H -#ifdef __KERNEL__ /* * This program is free software; you can redistribute it and/or @@ -9,9 +8,6 @@ * 2 of the License, or (at your option) any later version. */ -#ifdef CONFIG_GENERIC_CSUM -#include -#else #include #include /* @@ -217,6 +213,4 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum sum); -#endif -#endif /* __KERNEL__ */ -#endif +#endif /* _ASM_POWERPC_CHECKSUM_H */ -- 2.20.1
[PATCH 02/11] riscv: remove the HAVE_KPROBES option
HAVE_KPROBES is defined genericly in arch/Kconfig and architectures should just select it if supported. Signed-off-by: Christoph Hellwig --- arch/riscv/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 515fc3cc9687..b60f4e3e36f4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -94,9 +94,6 @@ config PGTABLE_LEVELS default 3 if 64BIT default 2 -config HAVE_KPROBES - def_bool n - menu "Platform type" choice -- 2.20.1
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On Wed 13-02-19 08:14:50, Dave Hansen wrote: > On 2/13/19 1:43 AM, Michal Hocko wrote: > > > > We have seen several bugs where zonelists have not been initialized > > properly and it is not really straightforward to track those bugs down. > > One way to help a bit at least is to dump zonelists of each node when > > they are (re)initialized. > > Were you thinking of boot-time bugs and crashes, or just stuff going > wonky after boot? Mostly boot time. I haven't seen hotplug related bugs in this direction. All the issues I have seen so far is that we forget a node altogether and it ends up with no zonelists at all. But who knows maybe we have some hidden bugs where zonelists is initialized only partially for some reason and there is no real way to find out. > We don't have the zonelists dumped in /proc anywhere, do we? Would that > help? I would prefer to not export such an implementation detail into proc -- Michal Hocko SUSE Labs
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On 2/13/19 1:43 AM, Michal Hocko wrote: > > We have seen several bugs where zonelists have not been initialized > properly and it is not really straightforward to track those bugs down. > One way to help a bit at least is to dump zonelists of each node when > they are (re)initialized. Were you thinking of boot-time bugs and crashes, or just stuff going wonky after boot? We don't have the zonelists dumped in /proc anywhere, do we? Would that help?
[PATCH v3 2/2] powerpc/8xx: Map 32Mb of RAM at init.
At the time being, initial MMU setup allows 24 Mbytes of DATA and 8 Mbytes of code. Some debug setup like CONFIG_KASAN generate huge kernels with text size over the 8M limit and data over the 24 Mbytes limit. Here is an 8xx kernel compiled with CONFIG_KASAN_INLINE for one of my boards: [root@po16846vm linux-powerpc]# size -x vmlinux textdata bss dec hex filename 0x111019c 0x41b0d40x490de02698452819bc050 vmlinux This patch maps up to 32 Mbytes code based on _einittext symbol and allows 32 Mbytes of memory instead of 24. Signed-off-by: Christophe Leroy --- v3: Maps 32M of both data and text. v2: Using IS_ENABLED() instead of #ifdef in 8xx_mmu.c arch/powerpc/kernel/head_8xx.S | 51 +- arch/powerpc/mm/8xx_mmu.c | 7 -- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 20cc816b3508..fe2857ef0309 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -337,8 +337,8 @@ InstructionTLBMiss: rlwinm r10, r10, 16, 0xfff8 cmpli cr0, r10, PAGE_OFFSET@h #ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 8M page */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x080)@h + /* It is assumed that kernel code fits into the first 32M */ +0: cmpli cr7, r10, (PAGE_OFFSET + 0x200)@h patch_site 0b, patch__itlbmiss_linmem_top #endif #endif @@ -434,7 +434,7 @@ DataStoreTLBMiss: #ifndef CONFIG_PIN_TLB_IMMR cmpli cr6, r10, VIRT_IMMR_BASE@h #endif -0: cmpli cr7, r10, (PAGE_OFFSET + 0x180)@h +0: cmpli cr7, r10, (PAGE_OFFSET + 0x200)@h patch_site 0b, patch__dtlbmiss_linmem_top mfspr r10, SPRN_M_TWB /* Get level 1 table */ @@ -886,28 +886,11 @@ initial_mmu: mtspr SPRN_MD_CTR, r10/* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB_TEXT - lis r8, MI_RSV4I@h - ori r8, r8, 0x1c00 - - mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ -#endif - #ifdef CONFIG_PIN_TLB_DATA orisr10, r10, MD_RSV4I@h mtspr SPRN_MD_CTR, r10/* Set data TLB control */ #endif - /* Now map the lower 8 Meg into the ITLB. */ - lis r8, KERNELBASE@h/* Create vaddr for TLB */ - ori r8, r8, MI_EVALID /* Mark it valid */ - mtspr SPRN_MI_EPN, r8 - li r8, MI_PS8MEG /* Set 8M byte page */ - ori r8, r8, MI_SVALID /* Make it valid */ - mtspr SPRN_MI_TWC, r8 - li r8, MI_BOOTINIT /* Create RPN for address 0 */ - mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ - lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 @@ -937,6 +920,34 @@ initial_mmu: mtspr SPRN_MD_RPN, r8 #endif + /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ +#ifdef CONFIG_PIN_TLB_TEXT + lis r8, MI_RSV4I@h + ori r8, r8, 0x1c00 +#endif + li r9, 4 /* up to 4 pages of 8M */ + mtctr r9 + lis r9, KERNELBASE@h/* Create vaddr for TLB */ + li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ + li r11, MI_BOOTINIT/* Create RPN for address 0 */ + lis r12, _einittext@h + ori r12, r12, _einittext@l +1: +#ifdef CONFIG_PIN_TLB_TEXT + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ + addir8, r8, 0x100 +#endif + + ori r0, r9, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r0 + mtspr SPRN_MI_TWC, r10 + mtspr SPRN_MI_RPN, r11/* Store TLB entry */ + addis r9, r9, 0x80 + addis r11, r11, 0x80 + + cmplcr0, r9, r12 + bdnzf gt, 1b + /* Since the cache is enabled according to the information we * just loaded into the TLB, invalidate and enable the caches here. * We should probably check/set other modeslater. diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 92b677faea8c..50c8cd8d3cb9 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -112,6 +112,9 @@ unsigned long __init mmu_mapin_ram(unsigned long top) mmu_patch_cmp_limit(__itlbmiss_linmem_top, 0); } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(__itlbmiss_linmem_top, + _ALIGN(__pa(_einittext), 8 << 20)); } mmu_patch_cmp_limit(__dtlbmiss_linmem_top, mapped); @@ -140,8 +143,8 @@ void __init
[PATCH v3 1/2] powerpc/8xx: replace most #ifdef by IS_ENABLED() in 8xx_mmu.c
This patch replaces most #ifdef mess by IS_ENABLED() in 8xx_mmu.c This has the advantage of allowing syntax verification at compile time regardless of selected options. Signed-off-by: Christophe Leroy --- v3: no change v2: left CONFIG_BDI_SWITCH change aside as it goes away in another patch arch/powerpc/mm/8xx_mmu.c | 44 +++- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index bfa503cff351..92b677faea8c 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -66,26 +66,22 @@ unsigned long p_block_mapped(phys_addr_t pa) void __init MMU_init_hw(void) { /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ -#ifdef CONFIG_PIN_TLB_DATA - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe00; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; -#ifdef CONFIG_PIN_TLB_IMMR - int i = 29; -#else - int i = 28; -#endif - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe00; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; + int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } } -#endif } static void __init mmu_mapin_immr(void) @@ -110,12 +106,10 @@ unsigned long __init mmu_mapin_ram(unsigned long top) if (__map_without_ltlbs) { mapped = 0; mmu_mapin_immr(); -#ifndef CONFIG_PIN_TLB_IMMR - patch_instruction_site(__dtlbmiss_immr_jmp, PPC_INST_NOP); -#endif -#ifndef CONFIG_PIN_TLB_TEXT - mmu_patch_cmp_limit(__itlbmiss_linmem_top, 0); -#endif + if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) + patch_instruction_site(__dtlbmiss_immr_jmp, PPC_INST_NOP); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(__itlbmiss_linmem_top, 0); } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); } -- 2.13.3
Re: [PATCH v2 2/2] locking/rwsem: Optimize down_read_trylock()
On 02/13/2019 02:45 AM, Ingo Molnar wrote: > * Waiman Long wrote: > >> I looked at the assembly code in arch/x86/include/asm/rwsem.h. For both >> trylocks (read & write), the count is read first before attempting to >> lock it. We did the same for all trylock functions in other locks. >> Depending on how the trylock is used and how contended the lock is, it >> may help or hurt performance. Changing down_read_trylock to do an >> unconditional cmpxchg will change the performance profile of existing >> code. So I would prefer keeping the current code. >> >> I do notice now that the generic down_write_trylock() code is doing an >> unconditional compxchg. So I wonder if we should change it to read the >> lock first like other trylocks or just leave it as it is. > No, I think we should instead move the other trylocks to the > try-for-ownership model as well, like Linus suggested. > > That's the general assumption we make in locking primitives, that we > optimize for the common, expected case - which would be that the trylock > succeeds, and I don't see why trylock primitives should be different. > > In fact I can see more ways for read-for-sharing to perform suboptimally > on larger systems. I don't mind changing to the try-for-ownership model for rwsem and mutex. I do have some concern to do that for spinlock. Some of the lock slowpath code do optimistic trylock. Making them unconditional cmpxchg will impact lock contention performance. I will update this rwsem patch to make the change while I am working on it. For other locks, I will suggest we go slow and carefully evaluate the performance implication before we make the changes. Cheers, Longman
Re: Kernel panic when loading the IDE controller driver
Le 13/02/2019 à 13:53, sgosavi1 a écrit : Why using 4.15.13 which is obsolete instead of using one of the Long Term Support versions which are still maintained, like 4.14 or 4.19 ? (see the complete list at https://www.kernel.org/category/releases.html) Well, when I started this task 4.15.13 was probably the latest stable release and hence we decided to port this version. In the older kernel, we have the m8260_setup.c source file for our board where the function "io_block_mapping" was used to configure the non-standard IO port address starting at 0xe000 location. This address was passed as the base address followed by control address and IRQ number to the ide-core.ko module. In the new kernel we do not have an option to send these addresses and IRQ numbers as arguments to the driver. Instead the ide-generic.c source file in the new kernel uses the standard IO port values and IRQ values. I modified the code in the above file to used the addresses and IRQ number we used in the past. Also, added code in the "MMU_init" function call available under arch/PowerPC/init_32.c to setup the IO port address range by adding the "io_block_mapping" call and the required IO port address range. Is there anything else that needs to be added or how can we configure the desired IO address range in the new kernel? Maybe look around https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=9a0e77f28b50128df0c9e26ae489e44e29a7270a Also look at ide_platform.c. I imagine there must be some way to set it up in your device tree. Maybe Bartlomiej Zolnierkiewicz can help ? Christophe
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On Wed 13-02-19 14:11:31, Peter Zijlstra wrote: > On Wed, Feb 13, 2019 at 12:50:14PM +0100, Michal Hocko wrote: > > On Wed 13-02-19 11:32:31, Peter Zijlstra wrote: > > > On Wed, Feb 13, 2019 at 10:43:15AM +0100, Michal Hocko wrote: > > > > @@ -5259,6 +5261,11 @@ static void build_zonelists(pg_data_t *pgdat) > > > > > > > > build_zonelists_in_node_order(pgdat, node_order, nr_nodes); > > > > build_thisnode_zonelists(pgdat); > > > > + > > > > + pr_info("node[%d] zonelist: ", pgdat->node_id); > > > > + for_each_zone_zonelist(zone, z, > > > > >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) > > > > + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); > > > > + pr_cont("\n"); > > > > } > > > > > > Have you ran this by the SGI and other stupid large machine vendors? > > > > I do not have such a large machine handy. The biggest I have has > > handfull (say dozen) of NUMA nodes. > > > > > Traditionally they tend to want to remove such things instead of adding > > > them. > > > > I do not insist on this patch but I find it handy. If there is an > > opposition I will not miss it much. > > Well, I don't have machines like that either and don't mind the patch. > Just raising the issue; I've had the big iron boys complain about > similar things (typically printing something for every CPU, which gets > out of hand much faster than zones, but still). Maybe we can try to push this through and revert if somebody complains about an excessive output. -- Michal Hocko SUSE Labs
Re: [PATCH v3 2/2] drivers/mtd: Fix device registration error
Subject prefix should be "mtd: powernv_flash: " On Mon, 11 Feb 2019 19:03:38 +0530 "Aneesh Kumar K.V" wrote: > This change helps me to get multiple mtd device registered. Without this > I get > > sysfs: cannot create duplicate filename '/bus/nvmem/devices/flash0' > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc2-00557-g1ef20ef21f22 #13 > Call Trace: > [c000b38e3220] [c0b58fe4] dump_stack+0xe8/0x164 (unreliable) > [c000b38e3270] [c04cf074] sysfs_warn_dup+0x84/0xb0 > [c000b38e32f0] [c04cf6c4] > sysfs_do_create_link_sd.isra.0+0x114/0x150 > [c000b38e3340] [c0726a84] bus_add_device+0x94/0x1e0 > [c000b38e33c0] [c07218f0] device_add+0x4d0/0x830 > [c000b38e3480] [c09d54a8] nvmem_register.part.2+0x1c8/0xb30 > [c000b38e3560] [c0834530] mtd_nvmem_add+0x90/0x120 > [c000b38e3650] [c0835bc8] add_mtd_device+0x198/0x4e0 > [c000b38e36f0] [c083619c] mtd_device_parse_register+0x11c/0x280 > [c000b38e3780] [c0840830] powernv_flash_probe+0x180/0x250 > [c000b38e3820] [c072c120] platform_drv_probe+0x60/0xf0 > [c000b38e38a0] [c07283c8] really_probe+0x138/0x4d0 > [c000b38e3930] [c0728acc] driver_probe_device+0x13c/0x1b0 > [c000b38e39b0] [c0728c7c] __driver_attach+0x13c/0x1c0 > [c000b38e3a30] [c0725130] bus_for_each_dev+0xa0/0x120 > [c000b38e3a90] [c0727b2c] driver_attach+0x2c/0x40 > [c000b38e3ab0] [c07270f8] bus_add_driver+0x228/0x360 > [c000b38e3b40] [c072a2e0] driver_register+0x90/0x1a0 > [c000b38e3bb0] [c072c020] __platform_driver_register+0x50/0x70 > [c000b38e3bd0] [c105c984] powernv_flash_driver_init+0x24/0x38 > [c000b38e3bf0] [c0010904] do_one_initcall+0x84/0x464 > [c000b38e3cd0] [c1004548] kernel_init_freeable+0x530/0x634 > [c000b38e3db0] [c0011154] kernel_init+0x1c/0x168 > [c000b38e3e20] [c000bed4] ret_from_kernel_thread+0x5c/0x68 > mtd mtd1: Failed to register NVMEM device > > With the change we now have > > root@(none):/sys/bus/nvmem/devices# ls -al > total 0 > drwxr-xr-x 2 root root 0 Feb 6 20:49 . > drwxr-xr-x 4 root root 0 Feb 6 20:49 .. > lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@0 -> > ../../../devices/platform/ibm,opal:flash@0/mtd/mtd0/flash@0 > lrwxrwxrwx 1 root root 0 Feb 6 20:49 flash@1 -> > ../../../devices/platform/ibm,opal:flash@1/mtd/mtd1/flash@1 > > Fixes: acfe63ec1c59 ("mtd: Convert to using %pOFn instead of > device_node.name") Actually it's not this commit that is at fault as mtd->name was already given the value of device_node->name before that. I think you're actually fixing 1cbb4a1c433a ("mtd: powernv: Add powernv flash MTD abstraction driver"). No need to send a new version, I can fix that when applying, just let me know if you're okay with the changes I suggested. > Signed-off-by: Aneesh Kumar K.V > --- > drivers/mtd/devices/powernv_flash.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/mtd/devices/powernv_flash.c > b/drivers/mtd/devices/powernv_flash.c > index 22f753e555ac..83f88b8b5d9f 100644 > --- a/drivers/mtd/devices/powernv_flash.c > +++ b/drivers/mtd/devices/powernv_flash.c > @@ -212,7 +212,7 @@ static int powernv_flash_set_driver_info(struct device > *dev, >* Going to have to check what details I need to set and how to >* get them >*/ > - mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFn", dev->of_node); > + mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFP", dev->of_node); > mtd->type = MTD_NORFLASH; > mtd->flags = MTD_WRITEABLE; > mtd->size = size;
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On Wed, Feb 13, 2019 at 12:50:14PM +0100, Michal Hocko wrote: > On Wed 13-02-19 11:32:31, Peter Zijlstra wrote: > > On Wed, Feb 13, 2019 at 10:43:15AM +0100, Michal Hocko wrote: > > > @@ -5259,6 +5261,11 @@ static void build_zonelists(pg_data_t *pgdat) > > > > > > build_zonelists_in_node_order(pgdat, node_order, nr_nodes); > > > build_thisnode_zonelists(pgdat); > > > + > > > + pr_info("node[%d] zonelist: ", pgdat->node_id); > > > + for_each_zone_zonelist(zone, z, > > > >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) > > > + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); > > > + pr_cont("\n"); > > > } > > > > Have you ran this by the SGI and other stupid large machine vendors? > > I do not have such a large machine handy. The biggest I have has > handfull (say dozen) of NUMA nodes. > > > Traditionally they tend to want to remove such things instead of adding > > them. > > I do not insist on this patch but I find it handy. If there is an > opposition I will not miss it much. Well, I don't have machines like that either and don't mind the patch. Just raising the issue; I've had the big iron boys complain about similar things (typically printing something for every CPU, which gets out of hand much faster than zones, but still).
Re: Kernel panic when loading the IDE controller driver
> Why using 4.15.13 which is obsolete instead of using one of the Long > Term Support versions which are still maintained, like 4.14 or 4.19 ? > (see the complete list at https://www.kernel.org/category/releases.html) Well, when I started this task 4.15.13 was probably the latest stable release and hence we decided to port this version. In the older kernel, we have the m8260_setup.c source file for our board where the function "io_block_mapping" was used to configure the non-standard IO port address starting at 0xe000 location. This address was passed as the base address followed by control address and IRQ number to the ide-core.ko module. In the new kernel we do not have an option to send these addresses and IRQ numbers as arguments to the driver. Instead the ide-generic.c source file in the new kernel uses the standard IO port values and IRQ values. I modified the code in the above file to used the addresses and IRQ number we used in the past. Also, added code in the "MMU_init" function call available under arch/PowerPC/init_32.c to setup the IO port address range by adding the "io_block_mapping" call and the required IO port address range. Is there anything else that needs to be added or how can we configure the desired IO address range in the new kernel? Thanks, Sachin -- Sent from: http://linuxppc.10917.n7.nabble.com/linuxppc-dev-f3.html
Re: [PATCH] mmap.2: describe the 5level paging hack
Hi Jann, On Mon, Feb 11, 2019 at 05:36:53PM +0100, Jann Horn wrote: > The manpage is missing information about the compatibility hack for > 5-level paging that went in in 4.14, around commit ee00f4a32a76 ("x86/mm: > Allow userspace have mappings above 47-bit"). Add some information about > that. > > While I don't think any hardware supporting this is shipping yet (?), I > think it's useful to try to write a manpage for this API, partly to > figure out how usable that API actually is, and partly because when this > hardware does ship, it'd be nice if distro manpages had information about > how to use it. > > Signed-off-by: Jann Horn > --- > This patch goes on top of the patch "[PATCH] mmap.2: fix description of > treatment of the hint" that I just sent, but I'm not sending them in a > series because I want the first one to go in, and I think this one might > be a bit more controversial. > > It would be nice if the architecture maintainers and mm folks could have > a look at this and check that what I wrote is right - I only looked at > the source for this, I haven't tried it. > > man2/mmap.2 | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/man2/mmap.2 b/man2/mmap.2 > index 8556bbfeb..977782fa8 100644 > --- a/man2/mmap.2 > +++ b/man2/mmap.2 > @@ -67,6 +67,8 @@ is NULL, > then the kernel chooses the (page-aligned) address > at which to create the mapping; > this is the most portable method of creating a new mapping. > +On Linux, in this case, the kernel may limit the maximum address that can be > +used for allocations to a legacy limit for compatibility reasons. > If > .I addr > is not NULL, > @@ -77,6 +79,19 @@ or equal to the value specified by > and attempt to create the mapping there. > If another mapping already exists there, the kernel picks a new > address, independent of the hint. > +However, if a hint above the architecture's legacy address limit is provided > +(on x86-64: above 0x7000, on arm64: above 0x1, on ppc64 > with > +book3s: above 0x7fff or 0x3fff, depending on page size), the > +kernel is permitted to allocate mappings beyond the architecture's legacy > +address limit. On arm64 we support 36-bit, 39-bit, 42-bit, 47-bit, 48-bit and 52-bit user virtual addresses, some of which also enforce a particular page size of 4k, 16k or 64k. With the exception of 52-bit, the user virtual address size is fixed at compile time and mmap() can allocate up to the maximum address size. When 52-bit virtual addressing is configured, we continue to allocate up to 48 bits unless either a hint is passed to mmap() as you describe, or CONFIG_ARM64_FORCE_52BIT=y (this is really intended as a debug option and is hidden behind EXPERT as well as being off by default). One thing that just occurred to me is that our ASLR code is probably pretty weak for addresses greater than 48 bits because I don't think it was updated when we added 52-bit support. I'll take a deeper look when I get some time. Will
Re: Kernel panic when loading the IDE controller driver
Le 13/02/2019 à 13:24, sgosavi1 a écrit : What it the last linux version known to work properly? We have used it successfully in the Linux-2.6.17.6 version. Oh, ok, there's a big gap between the two versions. Why using 4.15.13 which is obsolete instead of using one of the Long Term Support versions which are still maintained, like 4.14 or 4.19 ? (see the complete list at https://www.kernel.org/category/releases.html) Christophe Thanks, Sachin. -- Sent from: http://linuxppc.10917.n7.nabble.com/linuxppc-dev-f3.html
Re: Kernel panic when loading the IDE controller driver
> What it the last linux version known to work properly? We have used it successfully in the Linux-2.6.17.6 version. Thanks, Sachin. -- Sent from: http://linuxppc.10917.n7.nabble.com/linuxppc-dev-f3.html
Re: [PATCH v2 2/2] powerpc/8xx: Map a second 8M text page at startup when needed.
Le 21/01/2019 à 12:34, Christophe Leroy a écrit : Some debug setup like CONFIG_KASAN generate huge kernels with text size over the 8M limit. This patch maps a second 8M page when _einittext is over 8M. This is not enough for CONFIG_KASAN_INLINE. I'll send a v3 which maps up to 32M based on _einittext. Signed-off-by: Christophe Leroy --- v2: Using IS_ENABLED() instead of #ifdef in 8xx_mmu.c arch/powerpc/kernel/head_8xx.S | 27 +-- arch/powerpc/mm/8xx_mmu.c | 3 +++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 20cc816b3508..3b3b7846247f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -337,8 +337,8 @@ InstructionTLBMiss: rlwinm r10, r10, 16, 0xfff8 cmpli cr0, r10, PAGE_OFFSET@h #ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 8M page */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x080)@h + /* It is assumed that kernel code fits into the two first 8M pages */ +0: cmpli cr7, r10, (PAGE_OFFSET + 0x100)@h patch_site 0b, patch__itlbmiss_linmem_top #endif #endif @@ -908,6 +908,29 @@ initial_mmu: li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ + /* Map a second 8M page if needed */ + lis r9, _einittext@h + orisr9, r9, _einittext@l + cmpli cr0, r9, (PAGE_OFFSET + 0x800)@h Should be 0x80 here Christophe + blt 1f + +#ifdef CONFIG_PIN_TLB_TEXT + lis r8, MI_RSV4I@h + ori r8, r8, 0x1d00 + + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ +#endif + + lis r8, (KERNELBASE + 0x80)@h /* Create vaddr for TLB */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r8 + li r8, MI_PS8MEG /* Set 8M byte page */ + ori r8, r8, MI_SVALID /* Make it valid */ + mtspr SPRN_MI_TWC, r8 + li r8, MI_BOOTINIT /* Create RPN for address 0 */ + addis r8, r8, 0x80 + mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ +1: lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 92b677faea8c..b5f6d794281d 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -112,6 +112,9 @@ unsigned long __init mmu_mapin_ram(unsigned long top) mmu_patch_cmp_limit(__itlbmiss_linmem_top, 0); } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(__itlbmiss_linmem_top, + _ALIGN(__pa(_einittext), 8 << 20)); } mmu_patch_cmp_limit(__dtlbmiss_linmem_top, mapped);
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On Wed 13-02-19 11:32:31, Peter Zijlstra wrote: > On Wed, Feb 13, 2019 at 10:43:15AM +0100, Michal Hocko wrote: > > @@ -5259,6 +5261,11 @@ static void build_zonelists(pg_data_t *pgdat) > > > > build_zonelists_in_node_order(pgdat, node_order, nr_nodes); > > build_thisnode_zonelists(pgdat); > > + > > + pr_info("node[%d] zonelist: ", pgdat->node_id); > > + for_each_zone_zonelist(zone, z, > > >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) > > + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); > > + pr_cont("\n"); > > } > > Have you ran this by the SGI and other stupid large machine vendors? I do not have such a large machine handy. The biggest I have has handfull (say dozen) of NUMA nodes. > Traditionally they tend to want to remove such things instead of adding > them. I do not insist on this patch but I find it handy. If there is an opposition I will not miss it much. -- Michal Hocko SUSE Labs
Re: Kernel panic when loading the IDE controller driver
Hi, Le 13/02/2019 à 12:01, sgosavi1 a écrit : Hi All, I have been working on porting Linux-4.15.13 kernel on our existing MPC8270 processor board. For this exercise, I have used pq2fads as a reference board, its associated device tree and used cuImage for building the kernel. What it the last linux version known to work properly ? Christophe I am facing an issue with the generic IDE flash controller driver in the new kernel source where it fails to detect the Flash controller connected to 2GB NAND flash available on the board. We have used non-standard IO port addresses for this driver in the older kernel and I have added the required code in the new kernel to setup the set of addresses that we need as IO ports. Also, modified the code in the drivers/ide/ source to use the non-standard IO port address. However, during boot up while inserting the module I continue to get the below errors. [4.116587] ide: forcing hda as a disk (3543/255/63) [4.184190] Probing IDE interface ide0... [4.226330] Machine check in kernel mode. [4.233809] Caused by (from SRR1=49030): [4.233826] Transfer error ack signal [4.249263] IN from bad port e00e at 004123ef I understand that this is an addressing issue but not sure exactly what am I missing to fix the problem. Can you provide me your inputs on debugging this issue? Thanks, Sachin. -- Sent from: http://linuxppc.10917.n7.nabble.com/linuxppc-dev-f3.html
Re: [PATCH] hugetlb: allow to free gigantic pages regardless of the configuration
On 1/17/19 7:39 PM, Alexandre Ghiti wrote: > From: Alexandre Ghiti > > On systems without CMA or (MEMORY_ISOLATION && COMPACTION) activated but > that support gigantic pages, boottime reserved gigantic pages can not be > freed at all. This patchs simply enables the possibility to hand back > those pages to memory allocator. > > This commit then renames gigantic_page_supported and > ARCH_HAS_GIGANTIC_PAGE to make them more accurate. Indeed, those values > being false does not mean that the system cannot use gigantic pages: it > just means that runtime allocation of gigantic pages is not supported, > one can still allocate boottime gigantic pages if the architecture supports > it. > > Signed-off-by: Alexandre Ghiti I'm fine with the change, but wonder if this can be structured better in a way which would remove the duplicated "if (MEMORY_ISOLATION && COMPACTION) || CMA" from all arches, as well as the duplicated gigantic_page_runtime_allocation_supported() something like: - "select ARCH_HAS_GIGANTIC_PAGE" has no conditions, it just says the arch can support them either at boottime or runtime (but runtime is usable only if other conditions are met) - gigantic_page_runtime_allocation_supported() is a function that returns true if ARCH_HAS_GIGANTIC_PAGE && ((MEMORY_ISOLATION && COMPACTION) || CMA) and there's a single instance, not per-arch. - code for freeing gigantic pages can probably still be conditional on ARCH_HAS_GIGANTIC_PAGE BTW I wanted also to do something about the "(MEMORY_ISOLATION && COMPACTION) || CMA" ugliness itself, i.e. put the common parts behind some new kconfig (COMPACTION_CORE ?) and expose it better to users, but I can take a stab on that once the above part is settled. Vlastimil
[PATCH] powerpc/mm/hash: Increase vmalloc space with hash translation mode
From: Michael Ellerman This patch updates the kernel none linear virtual map area size to 512TB with 64K page size and hash translation mode. We allocate one context for the vmalloc region and hence the max virtual area size is limited by the context map size (512TB for 64K and 64TB for 4K page size). This patch fixes boot failures with large amounts of system RAM where we need large vmalloc space to handle per cpu allocation. Signed-off-by: Michael Ellerman Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash.h | 30 +++--- arch/powerpc/include/asm/book3s/64/radix.h | 5 +--- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 247aff9cc6ba..0a7b7d5bfa86 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,22 +40,34 @@ #else #define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) #endif + /* - * Define the address range of the kernel non-linear virtual area + * Define the address range of the kernel non-linear virtual area. In contrast + * to the linear mapping, this is managed using the kernel page tables and then + * inserted into the hash page table to actually take effect, similarly to user + * mappings. */ #define H_KERN_VIRT_START ASM_CONST(0xD000) -#define H_KERN_VIRT_SIZE ASM_CONST(0x4000) /* 64T */ +/* + * Allow virtual mapping of one context size. + * 512TB for 64K page size + * 64TB for 4K page size + */ +#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) +/* + * 8TB IO mapping size + */ +#define H_KERN_IO_SIZE ASM_CONST(0x800) /* 8T */ /* - * The vmalloc space starts at the beginning of that region, and - * occupies half of it on hash CPUs and a quarter of it on Book3E - * (we keep a quarter for the virtual memmap) + * The vmalloc space starts at the beginning of the kernel non-linear virtual + * region, and occupies 504T (64K) or 56T (4K) */ -#define H_VMALLOC_STARTH_KERN_VIRT_START -#define H_VMALLOC_SIZE ASM_CONST(0x3800) /* 56T */ -#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -#define H_KERN_IO_STARTH_VMALLOC_END +#define H_KERN_IO_START H_VMALLOC_END /* * Region IDs diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 7d1a3d1543fc..c7a0feaa1013 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -104,10 +104,7 @@ #define RADIX_VMALLOC_STARTRADIX_KERN_VIRT_START #define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) #define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) -/* - * Defines the address of the vmemap area, in its own region on - * hash table CPUs. - */ + #define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) #define RADIX_KERN_IO_START(RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) -- 2.20.1
Kernel panic when loading the IDE controller driver
Hi All, I have been working on porting Linux-4.15.13 kernel on our existing MPC8270 processor board. For this exercise, I have used pq2fads as a reference board, its associated device tree and used cuImage for building the kernel. I am facing an issue with the generic IDE flash controller driver in the new kernel source where it fails to detect the Flash controller connected to 2GB NAND flash available on the board. We have used non-standard IO port addresses for this driver in the older kernel and I have added the required code in the new kernel to setup the set of addresses that we need as IO ports. Also, modified the code in the drivers/ide/ source to use the non-standard IO port address. However, during boot up while inserting the module I continue to get the below errors. [4.116587] ide: forcing hda as a disk (3543/255/63) [4.184190] Probing IDE interface ide0... [4.226330] Machine check in kernel mode. [4.233809] Caused by (from SRR1=49030): [4.233826] Transfer error ack signal [4.249263] IN from bad port e00e at 004123ef I understand that this is an addressing issue but not sure exactly what am I missing to fix the problem. Can you provide me your inputs on debugging this issue? Thanks, Sachin. -- Sent from: http://linuxppc.10917.n7.nabble.com/linuxppc-dev-f3.html
Re: [PATCH v3 2/2] mm: be more verbose about zonelist initialization
On Wed, Feb 13, 2019 at 10:43:15AM +0100, Michal Hocko wrote: > @@ -5259,6 +5261,11 @@ static void build_zonelists(pg_data_t *pgdat) > > build_zonelists_in_node_order(pgdat, node_order, nr_nodes); > build_thisnode_zonelists(pgdat); > + > + pr_info("node[%d] zonelist: ", pgdat->node_id); > + for_each_zone_zonelist(zone, z, > >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) > + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); > + pr_cont("\n"); > } Have you ran this by the SGI and other stupid large machine vendors? Traditionally they tend to want to remove such things instead of adding them.
[PATCH v3 2/2] mm: be more verbose about zonelist initialization
From: Michal Hocko We have seen several bugs where zonelists have not been initialized properly and it is not really straightforward to track those bugs down. One way to help a bit at least is to dump zonelists of each node when they are (re)initialized. Signed-off-by: Michal Hocko --- Sorry for spamming. I have screwed up ammending the previous version. mm/page_alloc.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2e097f336126..52e54d16662a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5234,6 +5234,8 @@ static void build_zonelists(pg_data_t *pgdat) int node, load, nr_nodes = 0; nodemask_t used_mask; int local_node, prev_node; + struct zone *zone; + struct zoneref *z; /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; @@ -5259,6 +5261,11 @@ static void build_zonelists(pg_data_t *pgdat) build_zonelists_in_node_order(pgdat, node_order, nr_nodes); build_thisnode_zonelists(pgdat); + + pr_info("node[%d] zonelist: ", pgdat->node_id); + for_each_zone_zonelist(zone, z, >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); + pr_cont("\n"); } #ifdef CONFIG_HAVE_MEMORYLESS_NODES -- 2.20.1
[PATCH v2 2/2] mm: be more verbose about zonelist initialization
From: Michal Hocko We have seen several bugs where zonelists have not been initialized properly and it is not really straightforward to track those bugs down. One way to help a bit at least is to dump zonelists of each node when they are (re)initialized. Signed-off-by: Michal Hocko --- mm/page_alloc.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2e097f336126..02c843f0db4f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5234,6 +5234,7 @@ static void build_zonelists(pg_data_t *pgdat) int node, load, nr_nodes = 0; nodemask_t used_mask; int local_node, prev_node; + struct zone *zone; /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; @@ -5259,6 +5260,11 @@ static void build_zonelists(pg_data_t *pgdat) build_zonelists_in_node_order(pgdat, node_order, nr_nodes); build_thisnode_zonelists(pgdat); + + pr_info("node[%d] zonelist: ", pgdat->node_id); + for_each_zone_zonelist(zone, z, >node_zonelists[ZONELIST_FALLBACK], MAX_NR_ZONES-1) + pr_cont("%d:%s ", zone_to_nid(zone), zone->name); + pr_cont("\n"); } #ifdef CONFIG_HAVE_MEMORYLESS_NODES -- 2.20.1
Re: [PATCH-tip 00/22] locking/rwsem: Rework rwsem-xadd & enable new rwsem features
Hi all, Kernel test robot reported a will-it-scale.per_thread_ops -64.1% regression on IVB-desktop for v4.20-rc1. The first bad commit is: 9bc8039e715da3b53dbac89525323a9f2f69b7b5, Yang Shi : mm: brk: downgrade mmap_sem to read when shrinking (https://lists.01.org/pipermail/lkp/2018-November/009335.html). = compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase/ucode: gcc-7/performance/x86_64-rhel-7.2/thread/100%/debian-x86_64-2018-04-03.cgz/lkp-ivb-d01/brk1/will-it-scale/0x20 commit: 85a06835f6 ("mm: mremap: downgrade mmap_sem to read when shrinking") 9bc8039e71 ("mm: brk: downgrade mmap_sem to read when shrinking") 85a06835f6f1ba79 9bc8039e715da3b53dbac89525 -- %stddev %change %stddev \ |\ 196250 ± 8% -64.1% 70494will-it-scale.per_thread_ops 127330 ± 19% -98.0% 2525 ± 24% will-it-scale.time.involuntary_context_switches 727.50 ± 2% -77.0% 167.25 will-it-scale.time.percent_of_cpu_this_job_got 2141 ± 2% -77.6% 479.12will-it-scale.time.system_time 50.48 ± 7% -48.5% 25.98will-it-scale.time.user_time 34925294 ± 18%+270.3% 1.293e+08 ± 4% will-it-scale.time.voluntary_context_switches 1570007 ± 8% -64.1% 563958will-it-scale.workload 6435 ± 2% -6.4% 6024proc-vmstat.nr_shmem 1298 ± 16% -44.5% 721.00 ± 18% proc-vmstat.pgactivate 2341 +16.4% 2724slabinfo.kmalloc-96.active_objs 2341 +16.4% 2724slabinfo.kmalloc-96.num_objs 6346 ±150% -87.8% 776.25 ± 9% softirqs.NET_RX 160107 ± 8%+151.9% 403273softirqs.SCHED 1097999 -13.0% 955526softirqs.TIMER 5.50 ± 9% -81.8% 1.00vmstat.procs.r 230700 ± 19%+269.9% 853292 ± 4% vmstat.system.cs 26706 ± 3% +15.7% 30910 ± 5% vmstat.system.in 11.24 ± 23% +72.2 83.39mpstat.cpu.idle% 0.00 ±131% +0.00.04 ± 99% mpstat.cpu.iowait% 86.32 ± 2% -70.8 15.54mpstat.cpu.sys% 2.44 ± 7% -1.41.04 ± 8% mpstat.cpu.usr% 20610709 ± 15% +2376.0% 5.103e+08 ± 34% cpuidle.C1.time 3233399 ± 8%+241.5% 11042785 ± 25% cpuidle.C1.usage 36172040 ± 6%+931.3% 3.73e+08 ± 15% cpuidle.C1E.time 783605 ± 4%+548.7%5083041 ± 18% cpuidle.C1E.usage 28753819 ± 39% +1054.5% 3.319e+08 ± 49% cpuidle.C3.time 283912 ± 25%+688.4%2238225 ± 34% cpuidle.C3.usage 1.507e+08 ± 47%+292.3% 5.913e+08 ± 28% cpuidle.C6.time 339861 ± 37%+549.7%2208222 ± 24% cpuidle.C6.usage 2709719 ± 5%+824.2% 25043444cpuidle.POLL.time 28602864 ± 18%+173.7% 78276116 ± 10% cpuidle.POLL.usage We found that the patchset could fix the regression. tests: 1 testcase/path_params/tbox_group/run: will-it-scale/performance-thread-100%-brk1-ucode=0x20/lkp-ivb-d01 commit: 85a06835f6 ("mm: mremap: downgrade mmap_sem to read when shrinking") fb835fe7f0 ("locking/rwsem: Ensure an RT task will not spin on reader") 85a06835f6f1ba79 fb835fe7f0adbd7c2c074b98ec -- %stddev change %stddev \ |\ 120736 ± 22%56% 188019 ± 6% will-it-scale.time.involuntary_context_switches 2126 ± 3% 4% 2215will-it-scale.time.system_time 722 ± 3% 4%752 will-it-scale.time.percent_of_cpu_this_job_got 36256485 ± 27% -35% 23682989 ± 3% will-it-scale.time.voluntary_context_switches 3151 ± 9%11% 3504turbostat.Avg_MHz 229285 ± 32% -30% 160660 ± 3% vmstat.system.cs 120736 ± 22%56% 188019 ± 6% time.involuntary_context_switches 2126 ± 3% 4% 2215time.system_time 722 ± 3% 4%752time.percent_of_cpu_this_job_got 36256485 ± 27% -35% 23682989 ± 3% time.voluntary_context_switches 23 643%171 ± 3% proc-vmstat.nr_zone_inactive_file 23 643%171 ± 3% proc-vmstat.nr_inactive_file 3664 12% 4121proc-vmstat.nr_kernel_stack 6392 6% 6785proc-vmstat.nr_slab_unreclaimable 9991 10176proc-vmstat.nr_slab_reclaimable 63938 62394proc-vmstat.nr_zone_active_anon 63938 62394proc-vmstat.nr_active_anon 386388 ± 9%-6% 362272proc-vmstat.pgfree 368296 ± 9% -10% 333074
Re: [PATCH v2 2/2] locking/rwsem: Optimize down_read_trylock()
* Waiman Long wrote: > I looked at the assembly code in arch/x86/include/asm/rwsem.h. For both > trylocks (read & write), the count is read first before attempting to > lock it. We did the same for all trylock functions in other locks. > Depending on how the trylock is used and how contended the lock is, it > may help or hurt performance. Changing down_read_trylock to do an > unconditional cmpxchg will change the performance profile of existing > code. So I would prefer keeping the current code. > > I do notice now that the generic down_write_trylock() code is doing an > unconditional compxchg. So I wonder if we should change it to read the > lock first like other trylocks or just leave it as it is. No, I think we should instead move the other trylocks to the try-for-ownership model as well, like Linus suggested. That's the general assumption we make in locking primitives, that we optimize for the common, expected case - which would be that the trylock succeeds, and I don't see why trylock primitives should be different. In fact I can see more ways for read-for-sharing to perform suboptimally on larger systems. Thanks, Ingo
Re: [PATCH 01/12] mfd/sm501: depend on HAS_DMA
On Mon, 11 Feb 2019, Christoph Hellwig wrote: > Currently the sm501 mfd driver can be compiled without any dependencies, > but through the use of dma_declare_coherent it really depends on > having DMA and iomem support. Normally we don't explicitly require DMA > support as we have stubs for it if on UML, but in this case the driver > selects support for dma_declare_coherent and thus also requires > memmap support. Guard this by an explicit dependency. > > Signed-off-by: Christoph Hellwig > --- > drivers/mfd/Kconfig | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig > index f461460a2aeb..f15f6489803d 100644 > --- a/drivers/mfd/Kconfig > +++ b/drivers/mfd/Kconfig > @@ -1066,6 +1066,7 @@ config MFD_SI476X_CORE > > config MFD_SM501 > tristate "Silicon Motion SM501" > + depends on HAS_DMA >---help--- > This is the core driver for the Silicon Motion SM501 multimedia > companion chip. This device is a multifunction device which may I would normally have taken this, but I fear it will conflict with [PATCH 06/12]. For that reason, just take my: Acked-by: Lee Jones -- Lee Jones [李琼斯] Linaro Services Technical Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [PATCH 06/12] dma-mapping: improve selection of dma_declare_coherent availability
On Mon, 11 Feb 2019, Christoph Hellwig wrote: > This API is primarily used through DT entries, but two architectures > and two drivers call it directly. So instead of selecting the config > symbol for random architectures pull it in implicitly for the actual > users. Also rename the Kconfig option to describe the feature better. > > Signed-off-by: Christoph Hellwig > --- > arch/arc/Kconfig| 1 - > arch/arm/Kconfig| 2 +- > arch/arm64/Kconfig | 1 - > arch/csky/Kconfig | 1 - > arch/mips/Kconfig | 1 - > arch/riscv/Kconfig | 1 - > arch/sh/Kconfig | 2 +- > arch/unicore32/Kconfig | 1 - > arch/x86/Kconfig| 1 - > drivers/mfd/Kconfig | 2 ++ If everyone else is happy with these changes, then so am I. Acked-by: Lee Jones > drivers/of/Kconfig | 3 ++- > include/linux/device.h | 2 +- > include/linux/dma-mapping.h | 8 > kernel/dma/Kconfig | 2 +- > kernel/dma/Makefile | 2 +- > 15 files changed, 13 insertions(+), 17 deletions(-) -- Lee Jones [李琼斯] Linaro Services Technical Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
[PATCH 32/32] powerpc/dma: trim the fat from
There is no need to provide anything but get_arch_dma_ops to . More the remaining declarations to and drop all the includes. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky --- arch/powerpc/include/asm/dma-mapping.h| 29 --- arch/powerpc/include/asm/iommu.h | 10 +++ arch/powerpc/platforms/44x/ppc476.c | 1 + arch/powerpc/platforms/85xx/corenet_generic.c | 1 + arch/powerpc/platforms/85xx/qemu_e500.c | 1 + arch/powerpc/sysdev/fsl_pci.c | 1 + 6 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index a59c42879194..565d6f74b189 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -1,37 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2004 IBM - * - * Implements the generic device dma API for powerpc. - * the pci and vio busses */ #ifndef _ASM_DMA_MAPPING_H #define _ASM_DMA_MAPPING_H -#ifdef __KERNEL__ - -#include -#include -/* need struct page definitions */ -#include -#include -#include -#include -#include - -static inline unsigned long device_to_mask(struct device *dev) -{ - if (dev->dma_mask && *dev->dma_mask) - return *dev->dma_mask; - /* Assume devices without mask can take 32 bit addresses */ - return 0xul; -} - -/* - * Available generic sets of operations - */ -#ifdef CONFIG_PPC64 -extern const struct dma_map_ops dma_iommu_ops; -#endif static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { @@ -43,5 +15,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -#endif /* __KERNEL__ */ #endif /* _ASM_DMA_MAPPING_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 6f00a892ebdf..0ac52392ed99 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -325,5 +325,15 @@ extern bool iommu_fixed_is_weak; #define iommu_fixed_is_weak false #endif +extern const struct dma_map_ops dma_iommu_ops; + +static inline unsigned long device_to_mask(struct device *dev) +{ + if (dev->dma_mask && *dev->dma_mask) + return *dev->dma_mask; + /* Assume devices without mask can take 32 bit addresses */ + return 0xul; +} + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index e55933f9cd55..a5e61e5c16e2 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 808da1e9c0a7..785e9641220d 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 27631c607f3d..c52c8f9e8385 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include "smp.h" diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index a04c6dde6ed0..f49aec251a5a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include -- 2.20.1