Re: [PATCH 2/2] PCI/AER: Unexport pci_enable_pcie_error_reporting()
Looks good: Reviewed-by: Christoph Hellwig
Re: [PATCH 1/2] PCI/AER: Drop unused pci_disable_pcie_error_reporting()
Looks good: Reviewed-by: Christoph Hellwig
[PATCH v3 6/7] dax/kmem: Always enroll hotplugged memory for memmap_on_memory
From: Vishal Verma With DAX memory regions originating from CXL memory expanders or NVDIMMs, the kmem driver may be hot-adding huge amounts of system memory on a system without enough 'regular' main memory to support the memmap for it. To avoid this, ensure that all kmem managed hotplugged memory is added with the MHP_MEMMAP_ON_MEMORY flag to place the memmap on the new memory region being hot added. To do this, call add_memory() in chunks of memory_block_size_bytes() as that is a requirement for memmap_on_memory. Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Andrew Morton Cc: David Hildenbrand Cc: Oscar Salvador Cc: Dan Williams Cc: Dave Jiang Cc: Dave Hansen Cc: Huang Ying Signed-off-by: Vishal Verma Signed-off-by: Aneesh Kumar K.V --- drivers/dax/kmem.c | 81 +- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 898ca9505754..840bf7b40a44 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "dax-private.h" #include "bus.h" @@ -105,6 +106,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) data->mgid = rc; for (i = 0; i < dev_dax->nr_range; i++) { + u64 cur_start, cur_len, remaining; struct resource *res; struct range range; @@ -137,21 +139,42 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) res->flags = IORESOURCE_SYSTEM_RAM; /* -* Ensure that future kexec'd kernels will not treat -* this as RAM automatically. +* Add memory in chunks of memory_block_size_bytes() so that +* it is considered for MHP_MEMMAP_ON_MEMORY +* @range has already been aligned to memory_block_size_bytes(), +* so the following loop will always break it down cleanly. */ - rc = add_memory_driver_managed(data->mgid, range.start, - range_len(), kmem_name, MHP_NID_IS_MGID); - - if (rc) { - dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", - i, range.start, range.end); - remove_resource(res); - kfree(res); - data->res[i] = NULL; - if (mapped) - continue; - goto err_request_mem; + cur_start = range.start; + cur_len = memory_block_size_bytes(); + remaining = range_len(); + while (remaining) { + /* +* If alignment rules are not satisified we will +* fallback normal memmap allocation. +*/ + mhp_t mhp_flags = MHP_NID_IS_MGID | MHP_MEMMAP_ON_MEMORY; + /* +* Ensure that future kexec'd kernels will not treat +* this as RAM automatically. +*/ + rc = add_memory_driver_managed(data->mgid, cur_start, + cur_len, kmem_name, + mhp_flags); + + if (rc) { + dev_warn(dev, +"mapping%d: %#llx-%#llx memory add failed\n", +i, cur_start, cur_start + cur_len - 1); + remove_resource(res); + kfree(res); + data->res[i] = NULL; + if (mapped) + continue; + goto err_request_mem; + } + + cur_start += cur_len; + remaining -= cur_len; } mapped++; } @@ -186,25 +209,39 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) * unbind will succeed even if we return failure. */ for (i = 0; i < dev_dax->nr_range; i++) { + + u64 cur_start, cur_len, remaining; struct range range; + bool resource_remove; int rc; rc = dax_kmem_range(dev_dax, i, ); if (rc) continue; - rc = remove_memory(range.start, range_len()); - if (rc == 0) { + resource_remove = true; + cur_start = range.start; + cur_len = memory_block_size_bytes(); + remaining = range_len(); + while (remaining) { + + rc = remove_memory(cur_start, cur_len); +
[PATCH v3 7/7] mm/hotplug: Embed vmem_altmap details in memory block
With memmap on memory, some architecture needs more details w.r.t altmap such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of computing them again when we remove a memory block embed vmem_altmap details in struct memory_block if we are using memmap on memory block feature. No functional change in this patch Signed-off-by: Aneesh Kumar K.V --- drivers/base/memory.c | 35 ++- include/linux/memory.h | 8 ++-- mm/memory_hotplug.c| 34 ++ 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b456ac213610..10aacaecf8de 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -106,6 +106,10 @@ static void memory_block_release(struct device *dev) { struct memory_block *mem = to_memory_block(dev); + if (mem->altmap) { + WARN(mem->altmap->alloc, "Altmap not fully unmapped"); + kfree(mem->altmap); + } kfree(mem); } @@ -183,7 +187,7 @@ static int memory_block_online(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; + unsigned long nr_vmemmap_pages = 0; struct zone *zone; int ret; @@ -200,6 +204,9 @@ static int memory_block_online(struct memory_block *mem) * stage helps to keep accounting easier to follow - e.g vmemmaps * belong to the same zone as the memory they backed. */ + if (mem->altmap) + nr_vmemmap_pages = mem->altmap->alloc + mem->altmap->reserve; + if (nr_vmemmap_pages) { ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) @@ -230,7 +237,7 @@ static int memory_block_offline(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; + unsigned long nr_vmemmap_pages = 0; int ret; if (!mem->zone) @@ -240,6 +247,9 @@ static int memory_block_offline(struct memory_block *mem) * Unaccount before offlining, such that unpopulated zone and kthreads * can properly be torn down in offline_pages(). */ + if (mem->altmap) + nr_vmemmap_pages = mem->altmap->alloc + mem->altmap->reserve; + if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); @@ -726,7 +736,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid, #endif static int add_memory_block(unsigned long block_id, unsigned long state, - unsigned long nr_vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { struct memory_block *mem; @@ -744,7 +754,14 @@ static int add_memory_block(unsigned long block_id, unsigned long state, mem->start_section_nr = block_id * sections_per_block; mem->state = state; mem->nid = NUMA_NO_NODE; - mem->nr_vmemmap_pages = nr_vmemmap_pages; + if (altmap) { + mem->altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); + if (!mem->altmap) { + kfree(mem); + return -ENOMEM; + } + memcpy(mem->altmap, altmap, sizeof(*altmap)); + } INIT_LIST_HEAD(>group_next); #ifndef CONFIG_NUMA @@ -783,14 +800,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr) if (section_count == 0) return 0; return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, 0, NULL); + MEM_ONLINE, NULL, NULL); } static int add_hotplug_memory_block(unsigned long block_id, - unsigned long nr_vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { - return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group); + return add_memory_block(block_id, MEM_OFFLINE, altmap, group); } static void remove_memory_block(struct memory_block *memory) @@ -818,7 +835,7 @@ static void remove_memory_block(struct memory_block *memory) * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { const
[PATCH v3 5/7] powerpc/book3s64/memhotplug: Enable memmap on memory for radix
Radix vmemmap mapping can map things correctly at the PMD level or PTE level based on different device boundary checks. Hence we skip the restrictions w.r.t vmemmap size to be multiple of PMD_SIZE. This also makes the feature widely useful because to use PMD_SIZE vmemmap area we require a memory block size of 2GiB We can also use MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY to that the feature can work with a memory block size of 256MB. Using altmap.reserve feature to align things correctly at pageblock granularity. We can end up losing some pages in memory with this. For ex: with a 256MiB memory block size, we require 4 pages to map vmemmap pages, In order to align things correctly we end up adding a reserve of 28 pages. ie, for every 4096 pages 28 pages get reserved. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/pgtable.h| 28 +++ .../platforms/pseries/hotplug-memory.c| 3 +- mm/memory_hotplug.c | 2 ++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 116d6add0bb0..f890907e5bbf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -157,6 +157,7 @@ config PPC select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK + select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 68817ea7f994..8e6c92dde6ad 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -169,6 +169,34 @@ static inline bool is_ioremap_addr(const void *x) int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size); bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, unsigned long page_size); +/* + * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details + * some of the restrictions. We don't check for PMD_SIZE because our + * vmemmap allocation code can fallback correctly. The pageblock + * alignment requirement is met using altmap->reserve blocks. + */ +#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory +static inline bool arch_supports_memmap_on_memory(unsigned long size) +{ + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long vmemmap_size = nr_pages * sizeof(struct page); + + if (!radix_enabled()) + return false; + +#ifdef CONFIG_PPC_4K_PAGES + return IS_ALIGNED(vmemmap_size, PMD_SIZE); +#else + /* +* Make sure the vmemmap allocation is fully contianed +* so that we always allocate vmemmap memory from altmap area. +* The pageblock alignment requirement is met by using +* reserve blocks in altmap. +*/ + return IS_ALIGNED(vmemmap_size, PAGE_SIZE); +#endif +} + #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9c62c2c3b3d0..1447509357a7 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -617,6 +617,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { + mhp_t mhp_flags = MHP_NONE | MHP_MEMMAP_ON_MEMORY; unsigned long block_sz; int nid, rc; @@ -637,7 +638,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) nid = first_online_node; /* Add the memory */ - rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE); + rc = __add_memory(nid, lmb->base_addr, block_sz, mhp_flags); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f36aec1f7626..0c4d3fdd31a2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -2108,6 +2108,8 @@ static int __ref try_remove_memory(u64 start, u64 size) * right thing if we used vmem_altmap when hot-adding * the range. */ + mhp_altmap.base_pfn = PHYS_PFN(start); + mhp_altmap.free = PHYS_PFN(size) - nr_vmemmap_pages; mhp_altmap.alloc = nr_vmemmap_pages; altmap = _altmap; } -- 2.41.0
[PATCH v3 4/7] mm/hotplug: Allow pageblock alignment via altmap reservation
Add a new kconfig option that can be selected if we want to allow pageblock alignment by reserving pages in the vmemmap altmap area. This implies we will be reserving some pages for every memoryblock This also allows the memmap on memory feature to be widely useful with different memory block size values. Signed-off-by: Aneesh Kumar K.V --- mm/Kconfig | 9 +++ mm/memory_hotplug.c | 59 + 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index 932349271e28..88a1472b2086 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -570,6 +570,15 @@ config MHP_MEMMAP_ON_MEMORY depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE +config MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY + bool "Allow Reserving pages for page block aligment" + depends on MHP_MEMMAP_ON_MEMORY + help + This option allows memmap on memory feature to be more useful + with different memory block sizes. This is achieved by marking some pages + in each memory block as reserved so that we can get page-block alignment + for the remaining pages. + endif # MEMORY_HOTPLUG config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 07c99b0cc371..f36aec1f7626 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1252,15 +1252,17 @@ static inline bool arch_supports_memmap_on_memory(unsigned long size) { unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT; unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page); - unsigned long remaining_size = size - vmemmap_size; - return IS_ALIGNED(vmemmap_size, PMD_SIZE) && - IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)); + return IS_ALIGNED(vmemmap_size, PMD_SIZE); } #endif static bool mhp_supports_memmap_on_memory(unsigned long size) { + unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT; + unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page); + unsigned long remaining_size = size - vmemmap_size; + /* * Besides having arch support and the feature enabled at runtime, we * need a few more assumptions to hold true: @@ -1287,9 +1289,30 @@ static bool mhp_supports_memmap_on_memory(unsigned long size) * altmap as an alternative source of memory, and we do not exactly * populate a single PMD. */ - return mhp_memmap_on_memory() && - size == memory_block_size_bytes() && - arch_supports_memmap_on_memory(size); + if (!mhp_memmap_on_memory() || size != memory_block_size_bytes()) + return false; +/* + * Without page reservation remaining pages should be pageblock aligned. + */ + if (!IS_ENABLED(CONFIG_MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY) && + !IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT))) + return false; + + return arch_supports_memmap_on_memory(size); +} + +static inline unsigned long memory_block_align_base(unsigned long size) +{ + if (IS_ENABLED(CONFIG_MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY)) { + unsigned long align; + unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT; + unsigned long vmemmap_size; + + vmemmap_size = (nr_vmemmap_pages * sizeof(struct page)) >> PAGE_SHIFT; + align = pageblock_align(vmemmap_size) - vmemmap_size; + return align; + } else + return 0; } /* @@ -1302,7 +1325,11 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; enum memblock_flags memblock_flags = MEMBLOCK_NONE; - struct vmem_altmap mhp_altmap = {}; + struct vmem_altmap mhp_altmap = { + .base_pfn = PHYS_PFN(res->start), + .end_pfn = PHYS_PFN(res->end), + .reserve = memory_block_align_base(resource_size(res)), + }; struct memory_group *group = NULL; u64 start, size; bool new_node = false; @@ -1347,8 +1374,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) */ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { if (mhp_supports_memmap_on_memory(size)) { - mhp_altmap.free = PHYS_PFN(size); - mhp_altmap.base_pfn = PHYS_PFN(start); + mhp_altmap.free = PHYS_PFN(size) - mhp_altmap.reserve; params.altmap = _altmap; } /* fallback to not using altmap */ @@ -1360,7 +1386,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) goto error; /* create memory block devices after memory
[PATCH v3 3/7] mm/hotplug: Allow architecture to override memmap on memory support check
Some architectures would want different restrictions. Hence add an architecture-specific override. Both the PMD_SIZE check and pageblock alignment check are moved there. Signed-off-by: Aneesh Kumar K.V --- mm/memory_hotplug.c | 17 - 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1b19462f4e72..07c99b0cc371 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1247,12 +1247,20 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(>dev); } -static bool mhp_supports_memmap_on_memory(unsigned long size) +#ifndef arch_supports_memmap_on_memory +static inline bool arch_supports_memmap_on_memory(unsigned long size) { - unsigned long nr_vmemmap_pages = size / PAGE_SIZE; + unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT; unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page); unsigned long remaining_size = size - vmemmap_size; + return IS_ALIGNED(vmemmap_size, PMD_SIZE) && + IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)); +} +#endif + +static bool mhp_supports_memmap_on_memory(unsigned long size) +{ /* * Besides having arch support and the feature enabled at runtime, we * need a few more assumptions to hold true: @@ -1280,9 +1288,8 @@ static bool mhp_supports_memmap_on_memory(unsigned long size) * populate a single PMD. */ return mhp_memmap_on_memory() && - size == memory_block_size_bytes() && - IS_ALIGNED(vmemmap_size, PMD_SIZE) && - IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)); + size == memory_block_size_bytes() && + arch_supports_memmap_on_memory(size); } /* -- 2.41.0
[PATCH v3 2/7] mm/hotplug: Allow memmap on memory hotplug request to fallback
If not supported, fallback to not using memap on memmory. This avoids the need for callers to do the fallback. Signed-off-by: Aneesh Kumar K.V --- drivers/acpi/acpi_memhotplug.c | 3 +-- include/linux/memory_hotplug.h | 1 - mm/memory_hotplug.c| 13 ++--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 24f662d8bd39..d0c1a71007d0 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -211,8 +211,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) if (!info->length) continue; - if (mhp_supports_memmap_on_memory(info->length)) - mhp_flags |= MHP_MEMMAP_ON_MEMORY; + mhp_flags |= MHP_MEMMAP_ON_MEMORY; result = __add_memory(mgid, info->start_addr, info->length, mhp_flags); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 013c69753c91..96f6127f197f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -354,7 +354,6 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid, extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); -extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3f231cf1b410..1b19462f4e72 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1247,7 +1247,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(>dev); } -bool mhp_supports_memmap_on_memory(unsigned long size) +static bool mhp_supports_memmap_on_memory(unsigned long size) { unsigned long nr_vmemmap_pages = size / PAGE_SIZE; unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page); @@ -1339,13 +1339,12 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) * Self hosted memmap array */ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { - if (!mhp_supports_memmap_on_memory(size)) { - ret = -EINVAL; - goto error; + if (mhp_supports_memmap_on_memory(size)) { + mhp_altmap.free = PHYS_PFN(size); + mhp_altmap.base_pfn = PHYS_PFN(start); + params.altmap = _altmap; } - mhp_altmap.free = PHYS_PFN(size); - mhp_altmap.base_pfn = PHYS_PFN(start); - params.altmap = _altmap; + /* fallback to not using altmap */ } /* call arch's memory hotadd */ -- 2.41.0
[PATCH v3 1/7] mm/hotplug: Simplify ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE kconfig
Instead of adding menu entry with all supported architectures, add mm/Kconfig variable and select the same from supported architectures. No functional change in this patch. Acked-by: David Hildenbrand Signed-off-by: Aneesh Kumar K.V --- arch/arm64/Kconfig | 4 +--- arch/x86/Kconfig | 4 +--- mm/Kconfig | 3 +++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7856c3a3e35a..7e5985c018f8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -78,6 +78,7 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK + select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_GNU_PROPERTY select ARCH_USE_MEMTEST @@ -346,9 +347,6 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y -config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE - def_bool y - config SMP def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 78224aa76409..d0258e92a8af 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -102,6 +102,7 @@ config X86 select ARCH_HAS_DEBUG_WX select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -2610,9 +2611,6 @@ config ARCH_HAS_ADD_PAGES def_bool y depends on ARCH_ENABLE_MEMORY_HOTPLUG -config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE - def_bool y - menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/mm/Kconfig b/mm/Kconfig index 923bd35f81f2..932349271e28 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -572,6 +572,9 @@ config MHP_MEMMAP_ON_MEMORY endif # MEMORY_HOTPLUG +config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE + bool + # Heavily threaded applications may benefit from splitting the mm-wide # page_table_lock, so that faults on different parts of the user address # space can be handled with less contention: split it at this NR_CPUS. -- 2.41.0
[PATCH v3 0/7] Add support for memmap on memory feature on ppc64
This patch series update memmap on memory feature to fall back to memmap allocation outside the memory block if the alignment rules are not met. This makes the feature more useful on architectures like ppc64 where alignment rules are different with 64K page size. This patch series is dependent on dax vmemmap optimization series posted here https://lore.kernel.org/linux-mm/20230710160842.56300-1-aneesh.ku...@linux.ibm.com Changes from v2: * Rebase to latest linus tree * Redo the series based on review feedback. Multiple changes to the patchset. Changes from v1: * update the memblock to store vmemmap_altmap details. This is required so that when we remove the memory we can find the altmap details which is needed on some architectures. * rebase to latest linus tree Aneesh Kumar K.V (6): mm/hotplug: Simplify ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE kconfig mm/hotplug: Allow memmap on memory hotplug request to fallback mm/hotplug: Allow architecture to override memmap on memory support check mm/hotplug: Allow pageblock alignment via altmap reservation powerpc/book3s64/memhotplug: Enable memmap on memory for radix mm/hotplug: Embed vmem_altmap details in memory block Vishal Verma (1): dax/kmem: Always enroll hotplugged memory for memmap_on_memory arch/arm64/Kconfig| 4 +- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/pgtable.h| 28 + .../platforms/pseries/hotplug-memory.c| 3 +- arch/x86/Kconfig | 4 +- drivers/acpi/acpi_memhotplug.c| 3 +- drivers/base/memory.c | 35 -- drivers/dax/kmem.c| 81 ++ include/linux/memory.h| 8 +- include/linux/memory_hotplug.h| 1 - mm/Kconfig| 12 ++ mm/memory_hotplug.c | 103 -- 12 files changed, 205 insertions(+), 78 deletions(-) -- 2.41.0
Re: [PATCH v5 21/38] powerpc: Implement the new page table range API
Le 10/07/2023 à 22:43, Matthew Wilcox (Oracle) a écrit : > Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio(). > Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to > per-folio. > > Signed-off-by: Matthew Wilcox (Oracle) > Acked-by: Mike Rapoport (IBM) > Cc: Michael Ellerman > Cc: Nicholas Piggin > Cc: Christophe Leroy > Cc: linuxppc-dev@lists.ozlabs.org Reviewed-by: Christophe Leroy > --- > arch/powerpc/include/asm/book3s/32/pgtable.h | 5 -- > arch/powerpc/include/asm/book3s/64/pgtable.h | 6 +-- > arch/powerpc/include/asm/book3s/pgtable.h| 11 ++--- > arch/powerpc/include/asm/cacheflush.h| 14 -- > arch/powerpc/include/asm/kvm_ppc.h | 10 ++-- > arch/powerpc/include/asm/nohash/pgtable.h| 16 ++ > arch/powerpc/include/asm/pgtable.h | 12 + > arch/powerpc/mm/book3s64/hash_utils.c| 11 +++-- > arch/powerpc/mm/cacheflush.c | 40 +-- > arch/powerpc/mm/nohash/e500_hugetlbpage.c| 3 +- > arch/powerpc/mm/pgtable.c| 51 +++- > 11 files changed, 86 insertions(+), 93 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h > b/arch/powerpc/include/asm/book3s/32/pgtable.h > index 7bf1fe7297c6..5f12b9382909 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > @@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t > pgprot) >pgprot_val(pgprot)); > } > > -static inline unsigned long pte_pfn(pte_t pte) > -{ > - return pte_val(pte) >> PTE_RPN_SHIFT; > -} > - > /* Generic modifiers for PTE bits */ > static inline pte_t pte_wrprotect(pte_t pte) > { > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 4acc9690f599..c5baa3082a5a 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -104,6 +104,7 @@ >* and every thing below PAGE_SHIFT; >*/ > #define PTE_RPN_MASK(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK)) > +#define PTE_RPN_SHIFTPAGE_SHIFT > /* >* set of bits not changed in pmd_modify. Even though we have hash specific > bits >* in here, on radix we expect them to be zero. > @@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t > pgprot) > return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | > _PAGE_PTE); > } > > -static inline unsigned long pte_pfn(pte_t pte) > -{ > - return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT; > -} > - > /* Generic modifiers for PTE bits */ > static inline pte_t pte_wrprotect(pte_t pte) > { > diff --git a/arch/powerpc/include/asm/book3s/pgtable.h > b/arch/powerpc/include/asm/book3s/pgtable.h > index d18b748ea3ae..3b7bd36a2321 100644 > --- a/arch/powerpc/include/asm/book3s/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/pgtable.h > @@ -9,13 +9,6 @@ > #endif > > #ifndef __ASSEMBLY__ > -/* Insert a PTE, top-level function is out of line. It uses an inline > - * low level function in the respective pgtable-* files > - */ > -extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, > -pte_t pte); > - > - > #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS > extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long > address, >pte_t *ptep, pte_t entry, int dirty); > @@ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, > unsigned long address, pte_t >* corresponding HPTE into the hash table ahead of time, instead of >* waiting for the inevitable extra hash-table miss exception. >*/ > -static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned > long address, pte_t *ptep) > +static inline void update_mmu_cache_range(struct vm_fault *vmf, > + struct vm_area_struct *vma, unsigned long address, > + pte_t *ptep, unsigned int nr) > { > if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) > return; > diff --git a/arch/powerpc/include/asm/cacheflush.h > b/arch/powerpc/include/asm/cacheflush.h > index 7564dd4fd12b..ef7d2de33b89 100644 > --- a/arch/powerpc/include/asm/cacheflush.h > +++ b/arch/powerpc/include/asm/cacheflush.h > @@ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, > unsigned long end) >* It just marks the page as not i-cache clean. We do the i-cache >* flush later when the page is given to a user process, if necessary. >*/ > -static inline void flush_dcache_page(struct page *page) > +static inline void flush_dcache_folio(struct folio *folio) > { > if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > return; > /* avoid an atomic op if possible */ > - if (test_bit(PG_dcache_clean, >flags)) > -
Re: [PATCH v4 20/36] powerpc: Implement the new page table range API
Le 10/07/2023 à 22:24, Matthew Wilcox a écrit : > On Sat, Mar 18, 2023 at 09:19:04AM +, Christophe Leroy wrote: >> void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, >> pte_t pte, unsigned int nr) >> { >> pgprot_t prot; >> unsigned long pfn; >> /* >> * Make sure hardware valid bit is not set. We don't do >> * tlb flush for this update. >> */ >> VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); >> >> /* Note: mm->context.id might not yet have been assigned as >> * this context might not have been activated yet when this >> * is called. >> */ >> pte = set_pte_filter(pte); >> >> prot = pte_pgprot(pte); >> pfn = pte_pfn(pte); >> /* Perform the setting of the PTE */ >> for (;;) { >> __set_pte_at(mm, addr, ptep, pfn_pte(pfn, prot), 0); >> if (--nr == 0) >> break; >> ptep++; >> pfn++; >> addr += PAGE_SIZE; >> } >> } > > I'd rather the per-arch code were as similar to each other and the > generic implementation as possible. Fewer bugs that way and easier > for other people to make changes that have to touch every architecture > in the future. I understand your point but I dislike the idea of open coding pte manipulations when you have helpers for that. If you had used helpers from the begining you wouldn't have had the mishap you had in v4.
Re: [PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works
Christophe Leroy writes: > Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : >> Architectures like powerpc will like to use different page table allocators >> and mapping mechanisms to implement vmemmap optimization. Similar to >> vmemmap_populate allow architectures to implement >> vmemap_populate_compound_pages >> >> Signed-off-by: Aneesh Kumar K.V >> --- >> mm/sparse-vmemmap.c | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c >> index a044a130405b..541b3f69a481 100644 >> --- a/mm/sparse-vmemmap.c >> +++ b/mm/sparse-vmemmap.c >> @@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, >> start, end - 1); >> } >> >> +#ifndef vmemmap_populate_compound_pages >> pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int >> node, >> struct vmem_altmap *altmap, >> struct page *reuse) > > Should vmemmap_pte_populate() be static ? > > It looks odd to exclude a non-static function based on a non related macro. > > There are several such function in the block being excluded here. Can > you explain why it is correct to do that ? > Those functions can actually be made static. But I will do that as a part of different patch. I will update this patch and make sure the #ifdef will only override the vmemmap_populate_compound_pages. modified mm/sparse-vmemmap.c @@ -141,7 +141,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node, start, end - 1); } -#ifndef vmemmap_populate_compound_pages pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct vmem_altmap *altmap, struct page *reuse) @@ -359,6 +358,7 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, return 0; } +#ifndef vmemmap_populate_compound_pages /* * For compound pages bigger than section size (e.g. x86 1G compound * pages with 2M subsection size) fill the rest of sections as tail -aneesh
Re: [PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override
Christophe Leroy writes: > Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : >> dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within >> vmemmap such that tail page mapping can point to the second PAGE_SIZE area. >> Enforce that in vmemmap_can_optimize() function. >> >> Architectures like powerpc also want to enable vmemmap optimization >> conditionally (only with radix MMU translation). Hence allow architecture >> override. >> >> Signed-off-by: Aneesh Kumar K.V > > Reviewed-by: Christophe Leroy > > Why renaming vmemmap_can_optimize() to __vmemmap_can_optimize() and keep > it when vmemmap_can_optimize() has been override ? Is that because you > expect overriding version of vmemmap_can_optimize() to call > __vmemmap_can_optimize() ? > Yes, __vmemap_can_optimize will be used in patch 11 https://lore.kernel.org/linuxppc-dev/20230710160842.56300-12-aneesh.ku...@linux.ibm.com -aneesh
Re: [PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME
Christophe Leroy writes: > Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : >> This helps architectures to override pmd_same and pud_same independently. >> >> Signed-off-by: Aneesh Kumar K.V > > Reviewed-by: Christophe Leroy > > Shouldn't you do it the modern way and use #ifndef pud_same instead of a > new __HAVE_ARCH_PUD_SAME like in the old days ? > Sure will update. I was following existing pmd_same override. But I also agree #ifndef pud_same is better. > >> --- >> include/linux/pgtable.h | 2 ++ >> 1 file changed, 2 insertions(+) >> >> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h >> index 6fd9b2831338..91def34f7784 100644 >> --- a/include/linux/pgtable.h >> +++ b/include/linux/pgtable.h >> @@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) >> { >> return pmd_val(pmd_a) == pmd_val(pmd_b); >> } >> +#endif >> >> +#ifndef __HAVE_ARCH_PUD_SAME >> static inline int pud_same(pud_t pud_a, pud_t pud_b) >> { >> return pud_val(pud_a) == pud_val(pud_b); -aneesh
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
Hi Ming, Having no [PATCH 1/2] blk-mq: add blk_mq_max_nr_hw_queues() in inbox. So I reply here. At first glance, I think that the cpu hot plug callback hook should be the remedy for the newly introduced blk_mq_max_nr_hw_queues(), although it is more complicated. Consider the scene where nr_cpus=4, which can speed up the dumping process, the blk_mq_max_nr_hw_queues() can not utilize the other three cpus. Thanks, Pingfan On Mon, Jul 10, 2023 at 5:16 PM Ming Lei wrote: > > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > still returns all CPUs. > > > > That's simply broken. Please fix the arch code to make sure > > it does not return a bogus num_possible_cpus value for these > > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" > simply keep one of CPU cores as online, and others as offline. > > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for > kdump kernel. > > > setups, otherwise you'll have to paper over it in all kind of > > drivers. > > The issue is only triggered for drivers which use managed irq & > multiple hw queues. > > > Thanks, > Ming > > > ___ > kexec mailing list > ke...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec >
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
On Mon, Jul 10, 2023 at 5:16 PM Ming Lei wrote: > > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > still returns all CPUs. > > > > That's simply broken. Please fix the arch code to make sure > > it does not return a bogus num_possible_cpus value for these > In fact, num_possible_cpus is not broken. Quote from admin-guide/kernel-parameters.txt maxcpus=[SMP] Maximum number of processors that an SMP kernel will bring up during bootup. maxcpus=n : n >= 0 limits the kernel to bring up 'n' processors. Surely after bootup you can bring up the other plugged cpu by executing "echo 1 > /sys/devices/system/cpu/cpuX/online". So maxcpus only takes effect during system bootup. While n=0 is a special case, it is equivalent to "nosmp", which also disables the IO APIC. Here, as it explained, maxcpus only affects the bootup, later, extra cpus can be online. > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" On aarch64 and x86, nr_cpus=1 is used, while on ppc64, due to the implementation, nr_cpus=1 can not be supported. Thanks, Pingfan > simply keep one of CPU cores as online, and others as offline. > > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for > kdump kernel. > > > setups, otherwise you'll have to paper over it in all kind of > > drivers. > > The issue is only triggered for drivers which use managed irq & > multiple hw queues. > > > Thanks, > Ming > > > ___ > kexec mailing list > ke...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec >
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
Hi Baoquan, On Tue, Jul 11, 2023 at 11:35:50AM +0800, Baoquan He wrote: > On 07/10/23 at 05:14pm, Ming Lei wrote: > > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > > still returns all CPUs. > > > > > > That's simply broken. Please fix the arch code to make sure > > > it does not return a bogus num_possible_cpus value for these > > > > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" > > simply keep one of CPU cores as online, and others as offline. > > I don't know maxcpus on arm and ppc64 well. But maxcpus=1 or nr_cpus=1 > are suggested parameter. Because usually nr_cpus=1 is enough to make > kdump kernel work well to capture vmcore. However, user is allowed to > specify nr_cpus=n (n>1) if they think multiple cpus are needed in kdump > kernel. Your hard coding of cpu number in kdump kernel may be not so > reasonable. As I mentioned, for arm/ppc64, passing 'maxcpus=1' actually follows Documentation/admin-guide/kdump/kdump.rst. 'nr_cpus=N' just works fine, so not related with this topic. After 'maxcpus=1' is passed, kernel only brings up one of cpu cores as online during booting, and others still can be put into online by userspace. Now this way causes IO timeout on some storage device which uses managed irq and supports multiple io queues. Here the focus is if passing 'maxcpus=1' is valid for kdump kernel, that is we want to hear from our arch/kdump guys. If yes, something needs to be fixed, such as, what this patchset is doing. > > Please cc kexec mailing list when posting so that people can view the > whole thread of discussion. Already Cc kexe & arm/powerpc & irq list. Thanks, Ming
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
On 07/10/23 at 05:14pm, Ming Lei wrote: > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > still returns all CPUs. > > > > That's simply broken. Please fix the arch code to make sure > > it does not return a bogus num_possible_cpus value for these > > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" > simply keep one of CPU cores as online, and others as offline. I don't know maxcpus on arm and ppc64 well. But maxcpus=1 or nr_cpus=1 are suggested parameter. Because usually nr_cpus=1 is enough to make kdump kernel work well to capture vmcore. However, user is allowed to specify nr_cpus=n (n>1) if they think multiple cpus are needed in kdump kernel. Your hard coding of cpu number in kdump kernel may be not so reasonable. Please cc kexec mailing list when posting so that people can view the whole thread of discussion. Thanks Baoquan > > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for > kdump kernel. > > > setups, otherwise you'll have to paper over it in all kind of > > drivers. > > The issue is only triggered for drivers which use managed irq & > multiple hw queues. > > > Thanks, > Ming > > > ___ > linux-arm-kernel mailing list > linux-arm-ker...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
Re: [PATCH v7 5/8] KVM: x86/mmu: Don't pass FOLL_GET to __kvm_follow_pfn
On Tue, Jul 11, 2023 at 1:34 AM Isaku Yamahata wrote: > > On Fri, Jul 07, 2023 at 10:35:02AM +0900, > David Stevens wrote: > > > > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > > > > > index e44ab512c3a1..b1607e314497 100644 > > > > > > --- a/arch/x86/kvm/mmu/mmu.c > > > > > > +++ b/arch/x86/kvm/mmu/mmu.c > > > > > > > > > > ... > > > > > > > > > > > @@ -2937,6 +2943,7 @@ static int mmu_set_spte(struct kvm_vcpu > > > > > > *vcpu, struct kvm_memory_slot *slot, > > > > > > bool host_writable = !fault || fault->map_writable; > > > > > > bool prefetch = !fault || fault->prefetch; > > > > > > bool write_fault = fault && fault->write; > > > > > > + bool is_refcounted = !fault || fault->is_refcounted_page; > > > > > > > > > > Just wonder, what if a non-refcounted page is prefetched? Or is it > > > > > possible in > > > > > practice? > > > > > > > > Prefetching is still done via gfn_to_page_many_atomic, which sets > > > > FOLL_GET. That's fixable, but it's not something this series currently > > > > does. > > > > > > So if we prefetch a page, REFCOUNTED bit is cleared unconditionally with > > > this > > > hunk. kvm_set_page_{dirty, accessed} won't be called as expected for > > > prefetched > > > spte. If I read the patch correctly, REFCOUNTED bit in SPTE should > > > represent > > > whether the corresponding page is ref-countable or not, right? > > > > > > Because direct_pte_prefetch_many() is for legacy KVM MMU and > > > FNAME(prefetch_pte) > > > is shadow paging, we need to test it with legacy KVM MMU or shadow paging > > > to hit > > > the issue, though. > > > > > > > direct_pte_prefetch_many and prefetch_gpte both pass NULL for the > > fault parameter, so is_refcounted will evaluate to true. So the spte's > > refcounted bit will get set in that case. > > Oops, my bad. My point is "unconditionally". Is the bit always set for > non-refcountable pages? Or non-refcountable pages are not prefeched? The bit is never set for non-refcounted pages, and is always set for refcounted pages. The current series never prefetches non-refcounted pages, since it continues to use the gfn_to_page_many_atomic API. -David
[PATCH net-next v3 01/10] net: wan: Remove unnecessary (void*) conversions
From: wuych Pointer variables of void * type do not require type cast. Signed-off-by: Wu Yunchuan --- drivers/net/wan/fsl_ucc_hdlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 47c2ad7a3e42..91e37c3dcbee 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -350,7 +350,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv; + struct ucc_hdlc_private *priv = hdlc->priv; struct qe_bd *bd; u16 bd_status; unsigned long flags; -- 2.30.2
[powerpc:merge] BUILD SUCCESS 20125a0e8655abec375153cc23cc708ffa8c4380
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge branch HEAD: 20125a0e8655abec375153cc23cc708ffa8c4380 Automatic merge of 'master' into merge (2023-07-10 09:49) elapsed time: 1249m configs tested: 169 configs skipped: 9 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: alphaallyesconfig gcc alpha defconfig gcc alpharandconfig-r032-20230710 gcc arc alldefconfig gcc arc allyesconfig gcc arc defconfig gcc arc randconfig-r002-20230710 gcc arc randconfig-r014-20230710 gcc arc randconfig-r025-20230710 gcc arc randconfig-r031-20230710 gcc arc randconfig-r043-20230710 gcc arm allmodconfig gcc arm allyesconfig gcc arm axm55xx_defconfig gcc arm collie_defconfig clang arm defconfig gcc arm lpc18xx_defconfig gcc arm randconfig-r012-20230710 gcc arm randconfig-r046-20230710 gcc arm socfpga_defconfig clang arm sp7021_defconfig clang armspear3xx_defconfig clang arm spitz_defconfig clang arm64alldefconfig gcc arm64allyesconfig gcc arm64 defconfig gcc cskydefconfig gcc csky randconfig-r011-20230710 gcc csky randconfig-r013-20230710 gcc hexagon randconfig-r016-20230710 clang hexagon randconfig-r036-20230710 clang hexagon randconfig-r041-20230710 clang hexagon randconfig-r045-20230710 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230710 gcc i386 buildonly-randconfig-r005-20230710 gcc i386 buildonly-randconfig-r006-20230710 gcc i386 debian-10.3 gcc i386defconfig gcc i386 randconfig-i001-20230710 gcc i386 randconfig-i002-20230710 gcc i386 randconfig-i003-20230710 gcc i386 randconfig-i004-20230710 gcc i386 randconfig-i005-20230710 gcc i386 randconfig-i006-20230710 gcc i386 randconfig-i011-20230710 clang i386 randconfig-i012-20230710 clang i386 randconfig-i013-20230710 clang i386 randconfig-i014-20230710 clang i386 randconfig-i015-20230710 clang i386 randconfig-i016-20230710 clang i386 randconfig-r004-20230710 gcc i386 randconfig-r032-20230710 gcc i386 randconfig-r033-20230710 gcc loongarchallmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc loongarchrandconfig-r002-20230710 gcc loongarchrandconfig-r035-20230710 gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k amiga_defconfig gcc m68kdefconfig gcc m68km5407c3_defconfig gcc m68k randconfig-r016-20230710 gcc m68kstmark2_defconfig gcc microblaze mmu_defconfig gcc microblaze randconfig-r015-20230710 gcc mips allmodconfig gcc mips allyesconfig gcc mips ath25_defconfig clang mipsbcm47xx_defconfig gcc mips bmips_be_defconfig gcc mips bmips_stb_defconfig clang mips cavium_octeon_defconfig clang mips cobalt_defconfig gcc mips ip27_defconfig clang mips jazz_defconfig gcc mips loongson3_defconfig gcc mips randconfig-r024-20230710 gcc mips randconfig-r034-20230710 clang nios2 defconfig gcc nios2randconfig-r011-20230710 gcc nios2randconfig-r014-20230710 gcc openrisc randconfig-r015-20230710 gcc openrisc
[powerpc:fixes-test] BUILD SUCCESS cf53564b11cef5cdfafc548b172345c9aa753f89
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test branch HEAD: cf53564b11cef5cdfafc548b172345c9aa753f89 powerpc/mm/book3s64/hash/4k: Add pmd_same callback for 4K page size elapsed time: 1249m configs tested: 51 configs skipped: 179 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: alphaallyesconfig gcc alpha defconfig gcc arc allyesconfig gcc arm allmodconfig gcc arm allyesconfig gcc arm collie_defconfig clang arm defconfig gcc arm64allyesconfig gcc arm64 defconfig gcc cskydefconfig gcc hexagon randconfig-r041-20230710 clang hexagon randconfig-r045-20230710 clang i386 allyesconfig gcc i386 debian-10.3 gcc i386defconfig gcc loongarchallmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc m68k allmodconfig gcc m68k allyesconfig gcc m68kdefconfig gcc mips allmodconfig gcc mips allyesconfig gcc mips ath25_defconfig clang powerpc allmodconfig gcc powerpc allnoconfig gcc powerpccell_defconfig gcc powerpcge_imp3a_defconfig clang powerpc maple_defconfig gcc powerpc mpc832x_rdb_defconfig clang powerpc mpc83xx_defconfig gcc powerpc ppc40x_defconfig gcc powerpc randconfig-r004-20230710 gcc powerpc randconfig-r006-20230710 gcc powerpc randconfig-r026-20230710 clang powerpc sequoia_defconfig gcc powerpc skiroot_defconfig clang powerpc xes_mpc85xx_defconfig clang riscvallmodconfig gcc riscv allnoconfig gcc riscvallyesconfig gcc riscv defconfig gcc riscvrandconfig-r042-20230710 clang riscv rv32_defconfig gcc s390 alldefconfig clang s390 randconfig-r044-20230710 clang sh kfr2r09-romimage_defconfig gcc sparcallyesconfig gcc sparc defconfig gcc x86_64 kexec gcc xtensa nommu_kc705_defconfig gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
On Mon, Jul 10, 2023 at 10:51:43AM -0600, Keith Busch wrote: > On Mon, Jul 10, 2023 at 05:14:15PM +0800, Ming Lei wrote: > > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > > still returns all CPUs. > > > > > > That's simply broken. Please fix the arch code to make sure > > > it does not return a bogus num_possible_cpus value for these > > > > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" > > simply keep one of CPU cores as online, and others as offline. > > > > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for > > kdump kernel. > > > > > setups, otherwise you'll have to paper over it in all kind of > > > drivers. > > > > The issue is only triggered for drivers which use managed irq & > > multiple hw queues. > > Is the problem that the managed interrupt sets the effective irq > affinity to an offline CPU? You mentioned observed timeouts; are you Yes, the problem is that blk-mq only creates hctx0, so nvme-pci translate it into hctx0's nvme_queue, this way is actually wrong, cause blk-mq's view on queue topo isn't same with nvme's view. > seeing the "completion polled" nvme message? Yes, "completion polled" can be observed. Meantime the warning in __irq_startup_managed() can be triggered from nvme_timeout()->nvme_poll_irqdisable()->enable_irq(). Thanks, Ming
Re: [PATCH 0/2] PCI/AER: Remove/unexport error reporting enable/disable
On 7/10/23 4:21 PM, Bjorn Helgaas wrote: > From: Bjorn Helgaas > > pci_disable_pcie_error_reporting() is unused; remove it. > pci_enable_pcie_error_reporting() is used only inside aer.c; make it > static. Looks fine to me. Reviewed-by: Kuppuswamy Sathyanarayanan > > Bjorn Helgaas (2): > PCI/AER: Drop unused pci_disable_pcie_error_reporting() > PCI/AER: Unexport pci_enable_pcie_error_reporting() > > drivers/pci/pcie/aer.c | 15 +-- > include/linux/aer.h| 11 --- > 2 files changed, 1 insertion(+), 25 deletions(-) > -- Sathyanarayanan Kuppuswamy Linux Kernel Developer
Re: [PATCH net-next v2 01/10] net: wan: Remove unnecessary (void*) conversions
On 2023/7/11 00:34, Andrew Lunn wrote: On Mon, Jul 10, 2023 at 02:39:33PM +0800, Su Hui wrote: From: wuych Pointer variables of void * type do not require type cast. Signed-off-by: wuych --- drivers/net/wan/fsl_ucc_hdlc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 47c2ad7a3e42..73c73d8f4bb2 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -350,11 +350,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv; - struct qe_bd *bd; - u16 bd_status; + struct ucc_hdlc_private *priv = hdlc->priv; unsigned long flags; __be16 *proto_head; + struct qe_bd *bd; + u16 bd_status; When dealing with existing broken reverse Christmas tree, please don't make it worse with a change. But actually fixing it should be in a different patch. We want patches to be obviously correct. By removing the cast and moving variables around, it is less obvious it is correct, than having two patches. Got it, thanks. I will resend the v3 later which remove the change of reverse Christmas tree. So sorry for this! Wu Yunchuan Andrew
[PATCH 2/2] PCI/AER: Unexport pci_enable_pcie_error_reporting()
From: Bjorn Helgaas pci_enable_pcie_error_reporting() is used only inside aer.c. Stop exposing it outside the file. Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 3 +-- include/linux/aer.h| 6 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index d4c948b7c449..645149608054 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -230,7 +230,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } -int pci_enable_pcie_error_reporting(struct pci_dev *dev) +static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { int rc; @@ -240,7 +240,6 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); return pcibios_err_to_errno(rc); } -EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { diff --git a/include/linux/aer.h b/include/linux/aer.h index aadc9242cb20..2dd175f5debd 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -41,14 +41,8 @@ struct aer_capability_regs { }; #if defined(CONFIG_PCIEAER) -/* PCIe port driver needs this function to enable AER */ -int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_aer_clear_nonfatal_status(struct pci_dev *dev); #else -static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; -- 2.34.1
[PATCH 1/2] PCI/AER: Drop unused pci_disable_pcie_error_reporting()
From: Bjorn Helgaas pci_disable_pcie_error_reporting() has no callers. Remove it. Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 12 include/linux/aer.h| 5 - 2 files changed, 17 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index f6c24ded134c..d4c948b7c449 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -242,18 +242,6 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); -int pci_disable_pcie_error_reporting(struct pci_dev *dev) -{ - int rc; - - if (!pcie_aer_is_native(dev)) - return -EIO; - - rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); - return pcibios_err_to_errno(rc); -} -EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); - int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { int aer = dev->aer_cap; diff --git a/include/linux/aer.h b/include/linux/aer.h index 3a3ab05e13fd..aadc9242cb20 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -43,17 +43,12 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) /* PCIe port driver needs this function to enable AER */ int pci_enable_pcie_error_reporting(struct pci_dev *dev); -int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_aer_clear_nonfatal_status(struct pci_dev *dev); #else static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) { return -EINVAL; } -static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; -- 2.34.1
[PATCH 0/2] PCI/AER: Remove/unexport error reporting enable/disable
From: Bjorn Helgaas pci_disable_pcie_error_reporting() is unused; remove it. pci_enable_pcie_error_reporting() is used only inside aer.c; make it static. Bjorn Helgaas (2): PCI/AER: Drop unused pci_disable_pcie_error_reporting() PCI/AER: Unexport pci_enable_pcie_error_reporting() drivers/pci/pcie/aer.c | 15 +-- include/linux/aer.h| 11 --- 2 files changed, 1 insertion(+), 25 deletions(-) -- 2.34.1
[PATCH v2] syscalls: Cleanup references to sys_lookup_dcookie()
commit 'be65de6b03aa ("fs: Remove dcookies support")' removed the syscall definition for lookup_dcookie. However, syscall tables still point to the old sys_lookup_dcookie() definition. Update syscall tables of all architectures to directly point to sys_ni_syscall() instead. Signed-off-by: Sohil Mehta Reviewed-by: Randy Dunlap Acked-by: Namhyung Kim # for perf --- v2: - Rebased to v6.5-rc1. No other dependencies. - Added acquired tags. --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 4 ++-- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl| 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h| 1 - include/uapi/asm-generic/unistd.h | 2 +- kernel/sys_ni.c | 2 -- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 26 files changed, 24 insertions(+), 28 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 1f13995d00d7..1349012f5c2e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -334,7 +334,7 @@ 401common io_submit sys_io_submit 402common io_cancel sys_io_cancel 405common exit_group sys_exit_group -406common lookup_dcookie sys_lookup_dcookie +406common lookup_dcookie sys_ni_syscall 407common epoll_createsys_epoll_create 408common epoll_ctl sys_epoll_ctl 409common epoll_wait sys_epoll_wait diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 8ebed8a13874..cb7ea3bf18cf 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -263,7 +263,7 @@ 246common io_submit sys_io_submit 247common io_cancel sys_io_cancel 248common exit_group sys_exit_group -249common lookup_dcookie sys_lookup_dcookie +249common lookup_dcookie sys_ni_syscall 250common epoll_createsys_epoll_create 251common epoll_ctl sys_epoll_ctl sys_oabi_epoll_ctl 252common epoll_wait sys_epoll_wait diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index d952a28463e0..2d8ab890818a 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -508,8 +508,8 @@ __SYSCALL(__NR_io_submit, compat_sys_io_submit) __SYSCALL(__NR_io_cancel, sys_io_cancel) #define __NR_exit_group 248 __SYSCALL(__NR_exit_group, sys_exit_group) -#define __NR_lookup_dcookie 249 -__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) + /* 249 was lookup_dcookie */ +__SYSCALL(249, sys_ni_syscall) #define __NR_epoll_create 250 __SYSCALL(__NR_epoll_create, sys_epoll_create) #define __NR_epoll_ctl 251 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index f8c74ffeeefb..ac8bd817b1b9 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -222,7 +222,7 @@ 210common fadvise64 sys_fadvise64_64 211common tgkill sys_tgkill 212common exit_group sys_exit_group -213common lookup_dcookie sys_lookup_dcookie +213common lookup_dcookie sys_ni_syscall 214common io_setupsys_io_setup 215common io_destroy sys_io_destroy 216common io_geteventssys_io_getevents diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Mon, 2023-07-10 at 14:35 +0200, Christian Brauner wrote: > On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote: > > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > > > v2: > > > - prepend patches to add missing ctime updates > > > - add simple_rename_timestamp helper function > > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > > > - drop individual inode_ctime_set_{sec,nsec} helpers > > > > > > > After review by Jan and others, and Jan's ext4 rework, the diff on top > > of the series I posted a couple of days ago is below. I don't really > > want to spam everyone with another ~100 patch v3 series, but I can if > > you think that's best. > > > > Christian, what would you like me to do here? > > I picked up the series from the list and folded the fixups you posted > here into the respective fs conversion patches. I hope that helps you > avoid a resend. You should have received a separate "thank you" mail for > all of this. > > To each patch that I folded one of the fixlets from below into I added a > git note that records a link to your mail here and the respective patch > hunk from this mail that I folded into the patch. git.kernel.org will > show notes by default. For example, > https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e > should show you the note I added. You can also fetch them via > git fetch $remote refs/notes/*:refs/notes/* > (You probably know that ofc but jic.) if you're interested. > > Based on v6.5-rc1 as of today. > Many thanks!!! I'll get to work rebasing the multigrain timestamp series on top of that. > Btw, both b4 and patchwork somehow treat the series in weird was. > IOW, based on the message id of the cover letter I was able to pull most > messages except for: > > [07/92] fs: add ctime accessors infrastructure > [08/92] fs: new helper: simple_rename_timestamp > [92/92] fs: rename i_ctime field to __i_ctime > > which I pulled in separately. Not sure what the cause of > > this is. Good to know. I ended up doing the send in two phases: one for the cover letter and infrastructure patches that went to everyone, and one for the per- subsystem patches that went do individual maintainers and lists. I suspect that screwed up the message IDs somehow. Hopefully I won't need to do a posting like that again soon, but I'll pay closer attention to the message id handling next time. Thanks again! -- Jeff Layton
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote: > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > > v2: > > - prepend patches to add missing ctime updates > > - add simple_rename_timestamp helper function > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > > - drop individual inode_ctime_set_{sec,nsec} helpers > > > > After review by Jan and others, and Jan's ext4 rework, the diff on top > of the series I posted a couple of days ago is below. I don't really > want to spam everyone with another ~100 patch v3 series, but I can if > you think that's best. > > Christian, what would you like me to do here? I picked up the series from the list and folded the fixups you posted here into the respective fs conversion patches. I hope that helps you avoid a resend. You should have received a separate "thank you" mail for all of this. To each patch that I folded one of the fixlets from below into I added a git note that records a link to your mail here and the respective patch hunk from this mail that I folded into the patch. git.kernel.org will show notes by default. For example, https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e should show you the note I added. You can also fetch them via git fetch $remote refs/notes/*:refs/notes/* (You probably know that ofc but jic.) if you're interested. Based on v6.5-rc1 as of today. Btw, both b4 and patchwork somehow treat the series in weird was. IOW, based on the message id of the cover letter I was able to pull most messages except for: [07/92] fs: add ctime accessors infrastructure [08/92] fs: new helper: simple_rename_timestamp [92/92] fs: rename i_ctime field to __i_ctime which I pulled in separately. Not sure what the cause of this is.
Re: [PATCH v2 00/92] fs: new accessors for inode->i_ctime
On Wed, 05 Jul 2023 14:58:09 -0400, Jeff Layton wrote: > v2: > - prepend patches to add missing ctime updates > - add simple_rename_timestamp helper function > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > - drop individual inode_ctime_set_{sec,nsec} helpers > > I've been working on a patchset to change how the inode->i_ctime is > accessed in order to give us conditional, high-res timestamps for the > ctime and mtime. struct timespec64 has unused bits in it that we can use > to implement this. In order to do that however, we need to wrap all > accesses of inode->i_ctime to ensure that bits used as flags are > appropriately handled. > > [...] Applied to the vfs.ctime branch of the vfs/vfs.git tree. Patches in the vfs.ctime branch should appear in linux-next soon. Please report any outstanding bugs that were missed during review in a new review to the original patch series allowing us to drop it. It's encouraged to provide Acked-bys and Reviewed-bys even though the patch has now been applied. If possible patch trailers will be updated. Note that commit hashes shown below are subject to change due to rebase, trailer updates or similar. If in doubt, please check the listed branch. tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git branch: vfs.ctime [01/92] ibmvmc: update ctime in conjunction with mtime on write https://git.kernel.org/vfs/vfs/c/ead310563ad2 [02/92] bfs: update ctime in addition to mtime when adding entries https://git.kernel.org/vfs/vfs/c/f42faf14b838 [03/92] efivarfs: update ctime when mtime changes on a write https://git.kernel.org/vfs/vfs/c/d8d026e0d1f2 [04/92] exfat: ensure that ctime is updated whenever the mtime is https://git.kernel.org/vfs/vfs/c/d84bd8fa48d7 [05/92] apparmor: update ctime whenever the mtime changes on an inode https://git.kernel.org/vfs/vfs/c/73955caedfae [06/92] cifs: update the ctime on a partial page write https://git.kernel.org/vfs/vfs/c/c2f784379c99 [07/92] fs: add ctime accessors infrastructure https://git.kernel.org/vfs/vfs/c/64f0367de800 [08/92] fs: new helper: simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/54ced54a0239 [09/92] btrfs: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/218e0f662fee [10/92] ubifs: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/caac4f65568d [11/92] shmem: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/d3d11e9927b6 [12/92] exfat: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/71534b484c63 [13/92] ntfs3: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/140880821ce0 [14/92] reiserfs: convert to simple_rename_timestamp https://git.kernel.org/vfs/vfs/c/1a1a4df5e8fc [15/92] spufs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/784e5a93c9cf [16/92] s390: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/1cece1f8e5c2 [17/92] binderfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/0bcd830a76f3 [18/92] infiniband: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/811f97f80b01 [19/92] ibm: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/b447ed7597f0 [20/92] usb: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/2557dc7f2dde [21/92] 9p: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/4cd4b11385ef [22/92] adfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/e257d7ade66e [23/92] affs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/770619f19a77 [24/92] afs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/758506e44668 [25/92] fs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/a0a5a9810b37 [26/92] autofs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/d7d1363cc3f6 [27/92] befs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/d6218773de2d [28/92] bfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/368b313ac2ab [29/92] btrfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/d3d15221956a [30/92] ceph: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/818fc6e0129a [31/92] coda: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/4e0b22fbc012 [32/92] configfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/69c977798a6a [33/92] cramfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/911f086eae23 [34/92] debugfs: convert to ctime accessor functions https://git.kernel.org/vfs/vfs/c/634a50390dbb [35/92] devpts: convert to ctime accessor
Re: [PATCH v2 1/2] powerpc/tpm: Create linux,sml-base/size as big endian
On Thu, 2023-06-15 at 22:37 +1000, Michael Ellerman wrote: > There's code in prom_instantiate_sml() to do a "SML handover" (Stored > Measurement Log) from OF to Linux, before Linux shuts down Open > Firmware. > > This involves creating a buffer to hold the SML, and creating two device > tree properties to record its base address and size. The kernel then > later reads those properties from the device tree to find the SML. > > When the code was initially added in commit 4a727429abec ("PPC64: Add > support for instantiating SML from Open Firmware") the powerpc kernel > was always built big endian, so the properties were created big endian > by default. > > However since then little endian support was added to powerpc, and now > the code lacks conversions to big endian when creating the properties. > > This means on little endian kernels the device tree properties are > little endian, which is contrary to the device tree spec, and in > contrast to all other device tree properties. > > To cope with that a workaround was added in tpm_read_log_of() to skip > the endian conversion if the properties were created via the SML > handover. > > A better solution is to encode the properties as big endian as they > should be, and remove the workaround. > > Typically changing the encoding of a property like this would present > problems for kexec. However the SML is not propagated across kexec, so > changing the encoding of the properties is a non-issue. > > Fixes: e46e22f12b19 ("tpm: enhance read_log_of() to support Physical TPM > event log") > Signed-off-by: Michael Ellerman > Reviewed-by: Stefan Berger > --- > arch/powerpc/kernel/prom_init.c | 8 ++-- > drivers/char/tpm/eventlog/of.c | 23 --- > 2 files changed, 10 insertions(+), 21 deletions(-) Split into two patches (producer and consumer). BR, Jarkko > > v2: Add Stefan's reviewed-by. > > diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c > index d464ba412084..72fe306b6820 100644 > --- a/arch/powerpc/kernel/prom_init.c > +++ b/arch/powerpc/kernel/prom_init.c > @@ -1900,6 +1900,7 @@ static void __init prom_instantiate_sml(void) > u32 entry = 0, size = 0, succ = 0; > u64 base; > __be32 val; > + __be64 val64; > > prom_debug("prom_instantiate_sml: start...\n"); > > @@ -1956,10 +1957,13 @@ static void __init prom_instantiate_sml(void) > > reserve_mem(base, size); > > + val64 = cpu_to_be64(base); > prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base", > - , sizeof(base)); > + , sizeof(val64)); > + > + val = cpu_to_be32(size); > prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size", > - , sizeof(size)); > + , sizeof(val)); > > prom_debug("sml base = 0x%llx\n", base); > prom_debug("sml size = 0x%x\n", size); > diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c > index 930fe43d5daf..0bc0cb6333c6 100644 > --- a/drivers/char/tpm/eventlog/of.c > +++ b/drivers/char/tpm/eventlog/of.c > @@ -51,8 +51,8 @@ static int tpm_read_log_memory_region(struct tpm_chip *chip) > int tpm_read_log_of(struct tpm_chip *chip) > { > struct device_node *np; > - const u32 *sizep; > - const u64 *basep; > + const __be32 *sizep; > + const __be64 *basep; > struct tpm_bios_log *log; > u32 size; > u64 base; > @@ -73,23 +73,8 @@ int tpm_read_log_of(struct tpm_chip *chip) > if (sizep == NULL || basep == NULL) > return -EIO; > > - /* > - * For both vtpm/tpm, firmware has log addr and log size in big > - * endian format. But in case of vtpm, there is a method called > - * sml-handover which is run during kernel init even before > - * device tree is setup. This sml-handover function takes care > - * of endianness and writes to sml-base and sml-size in little > - * endian format. For this reason, vtpm doesn't need conversion > - * but physical tpm needs the conversion. > - */ > - if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0 && > - of_property_match_string(np, "compatible", "IBM,vtpm20") < 0) { > - size = be32_to_cpup((__force __be32 *)sizep); > - base = be64_to_cpup((__force __be64 *)basep); > - } else { > - size = *sizep; > - base = *basep; > - } > + size = be32_to_cpup(sizep); > + base = be64_to_cpup(basep); > > if (size == 0) { > dev_warn(>dev, "%s: Event log area empty\n", __func__);
[PATCH v5 21/38] powerpc: Implement the new page table range API
Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio(). Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to per-folio. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Rapoport (IBM) Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/include/asm/book3s/32/pgtable.h | 5 -- arch/powerpc/include/asm/book3s/64/pgtable.h | 6 +-- arch/powerpc/include/asm/book3s/pgtable.h| 11 ++--- arch/powerpc/include/asm/cacheflush.h| 14 -- arch/powerpc/include/asm/kvm_ppc.h | 10 ++-- arch/powerpc/include/asm/nohash/pgtable.h| 16 ++ arch/powerpc/include/asm/pgtable.h | 12 + arch/powerpc/mm/book3s64/hash_utils.c| 11 +++-- arch/powerpc/mm/cacheflush.c | 40 +-- arch/powerpc/mm/nohash/e500_hugetlbpage.c| 3 +- arch/powerpc/mm/pgtable.c| 51 +++- 11 files changed, 86 insertions(+), 93 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7bf1fe7297c6..5f12b9382909 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) pgprot_val(pgprot)); } -static inline unsigned long pte_pfn(pte_t pte) -{ - return pte_val(pte) >> PTE_RPN_SHIFT; -} - /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4acc9690f599..c5baa3082a5a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -104,6 +104,7 @@ * and every thing below PAGE_SHIFT; */ #define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK)) +#define PTE_RPN_SHIFT PAGE_SHIFT /* * set of bits not changed in pmd_modify. Even though we have hash specific bits * in here, on radix we expect them to be zero. @@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE); } -static inline unsigned long pte_pfn(pte_t pte) -{ - return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT; -} - /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index d18b748ea3ae..3b7bd36a2321 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -9,13 +9,6 @@ #endif #ifndef __ASSEMBLY__ -/* Insert a PTE, top-level function is out of line. It uses an inline - * low level function in the respective pgtable-* files - */ -extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte); - - #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); @@ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * corresponding HPTE into the hash table ahead of time, instead of * waiting for the inevitable extra hash-table miss exception. */ -static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) { if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 7564dd4fd12b..ef7d2de33b89 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) * It just marks the page as not i-cache clean. We do the i-cache * flush later when the page is given to a user process, if necessary. */ -static inline void flush_dcache_page(struct page *page) +static inline void flush_dcache_folio(struct folio *folio) { if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) return; /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, >flags)) - clear_bit(PG_dcache_clean, >flags); + if (test_bit(PG_dcache_clean, >flags)) + clear_bit(PG_dcache_clean, >flags); +} +#define flush_dcache_folio flush_dcache_folio + +static inline void flush_dcache_page(struct page *page) +{ + flush_dcache_folio(page_folio(page)); } void flush_icache_range(unsigned long start,
Re: [PATCH 00/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT flags
Hi Thomas, On Mon, Jul 10, 2023 at 02:50:04PM +0200, Thomas Zimmermann wrote: > Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT from > fbdev and drivers, as briefly discussed at [1]. Both flags were maybe > useful when fbdev had special handling for driver modules. With > commit 376b3ff54c9a ("fbdev: Nuke FBINFO_MODULE"), they are both 0 > and have no further effect. > > Patches 1 to 7 remove FBINFO_DEFAULT from drivers. Patches 2 to 5 > split this by the way the fb_info struct is being allocated. All flags > are cleared to zero during the allocation. > > Patches 8 to 16 do the same for FBINFO_FLAG_DEFAULT. Patch 8 fixes > an actual bug in how arch/sh uses the tokne for struct fb_videomode, > which is unrelated. > > Patch 17 removes both flag constants from We have a few more flags that are unused - should they be nuked too? FBINFO_HWACCEL_FILLRECT FBINFO_HWACCEL_ROTATE FBINFO_HWACCEL_XPAN Unused as in no references from fbdev/core/* I would rather see one series nuke all unused FBINFO flags in one go. Assuming my quick grep are right and the above can be dropped. Sam
Re: [PATCH v4 20/36] powerpc: Implement the new page table range API
On Sat, Mar 18, 2023 at 09:19:04AM +, Christophe Leroy wrote: > void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, > pte_t pte, unsigned int nr) > { > pgprot_t prot; > unsigned long pfn; > /* >* Make sure hardware valid bit is not set. We don't do >* tlb flush for this update. >*/ > VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); > > /* Note: mm->context.id might not yet have been assigned as >* this context might not have been activated yet when this >* is called. >*/ > pte = set_pte_filter(pte); > > prot = pte_pgprot(pte); > pfn = pte_pfn(pte); > /* Perform the setting of the PTE */ > for (;;) { > __set_pte_at(mm, addr, ptep, pfn_pte(pfn, prot), 0); > if (--nr == 0) > break; > ptep++; > pfn++; > addr += PAGE_SIZE; > } > } I'd rather the per-arch code were as similar to each other and the generic implementation as possible. Fewer bugs that way and easier for other people to make changes that have to touch every architecture in the future.
Re: [PATCH v4 08/13] powerpc/mm/trace: Convert trace event to trace event class
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > A follow-up patch will add a pud variant for this same event. > Using event class makes that addition simpler. > > No functional change in this patch. > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy > --- > arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- > arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- > include/trace/events/thp.h | 23 --- > 3 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c > b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 51f48984abca..988948d69bc1 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct > *mm, unsigned long addr > > old = be64_to_cpu(old_be); > > - trace_hugepage_update(addr, old, clr, set); > + trace_hugepage_update_pmd(addr, old, clr, set); > if (old & H_PAGE_HASHPTE) > hpte_do_hugepage_flush(mm, addr, pmdp, old); > return old; > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index e7ea492ac510..02e185d2e4d6 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -962,7 +962,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct > *mm, unsigned long add > #endif > > old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1); > - trace_hugepage_update(addr, old, clr, set); > + trace_hugepage_update_pmd(addr, old, clr, set); > > return old; > } > diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h > index 202b3e3e67ff..a95c78b10561 100644 > --- a/include/trace/events/thp.h > +++ b/include/trace/events/thp.h > @@ -8,25 +8,29 @@ > #include > #include > > -TRACE_EVENT(hugepage_set_pmd, > +DECLARE_EVENT_CLASS(hugepage_set, > > - TP_PROTO(unsigned long addr, unsigned long pmd), > - TP_ARGS(addr, pmd), > + TP_PROTO(unsigned long addr, unsigned long pte), > + TP_ARGS(addr, pte), > TP_STRUCT__entry( > __field(unsigned long, addr) > - __field(unsigned long, pmd) > + __field(unsigned long, pte) > ), > > TP_fast_assign( > __entry->addr = addr; > - __entry->pmd = pmd; > + __entry->pte = pte; > ), > > - TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, > __entry->pmd) > + TP_printk("Set page table entry with 0x%lx with 0x%lx", > __entry->addr, __entry->pte) > ); > > +DEFINE_EVENT(hugepage_set, hugepage_set_pmd, > + TP_PROTO(unsigned long addr, unsigned long pmd), > + TP_ARGS(addr, pmd) > +); > > -TRACE_EVENT(hugepage_update, > +DECLARE_EVENT_CLASS(hugepage_update, > > TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, > unsigned long set), > TP_ARGS(addr, pte, clr, set), > @@ -48,6 +52,11 @@ TRACE_EVENT(hugepage_update, > TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = > 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set) > ); > > +DEFINE_EVENT(hugepage_update, hugepage_update_pmd, > + TP_PROTO(unsigned long addr, unsigned long pmd, unsigned long clr, > unsigned long set), > + TP_ARGS(addr, pmd, clr, set) > +); > + > DECLARE_EVENT_CLASS(migration_pmd, > > TP_PROTO(unsigned long addr, unsigned long pmd),
Re: [PATCH v4 06/13] mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > pudp_set_wrprotect and move_huge_pud helpers are only used when > CONFIG_TRANSPARENT_HUGEPAGE is enabled. Similar to pmdp_set_wrprotect and > move_huge_pmd_helpers use architecture override only if > CONFIG_TRANSPARENT_HUGEPAGE is set > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy > --- > include/linux/pgtable.h | 2 ++ > mm/mremap.c | 2 +- > 2 files changed, 3 insertions(+), 1 deletion(-) > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 91def34f7784..b5af3e014606 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -558,6 +558,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct > *mm, > #endif > #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT > #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > static inline void pudp_set_wrprotect(struct mm_struct *mm, > unsigned long address, pud_t *pudp) > { > @@ -571,6 +572,7 @@ static inline void pudp_set_wrprotect(struct mm_struct > *mm, > { > BUILD_BUG(); > } > +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ > #endif > > diff --git a/mm/mremap.c b/mm/mremap.c > index 11e06e4ab33b..056478c106ee 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -349,7 +349,7 @@ static inline bool move_normal_pud(struct vm_area_struct > *vma, > } > #endif > > -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && > defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) > static bool move_huge_pud(struct vm_area_struct *vma, unsigned long > old_addr, > unsigned long new_addr, pud_t *old_pud, pud_t > *new_pud) > {
Re: [PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > This helps architectures to override pmd_same and pud_same independently. > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy Shouldn't you do it the modern way and use #ifndef pud_same instead of a new __HAVE_ARCH_PUD_SAME like in the old days ? > --- > include/linux/pgtable.h | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 6fd9b2831338..91def34f7784 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) > { > return pmd_val(pmd_a) == pmd_val(pmd_b); > } > +#endif > > +#ifndef __HAVE_ARCH_PUD_SAME > static inline int pud_same(pud_t pud_a, pud_t pud_b) > { > return pud_val(pud_a) == pud_val(pud_b);
Re: [PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > Architectures like powerpc will like to use different page table allocators > and mapping mechanisms to implement vmemmap optimization. Similar to > vmemmap_populate allow architectures to implement > vmemap_populate_compound_pages > > Signed-off-by: Aneesh Kumar K.V > --- > mm/sparse-vmemmap.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c > index a044a130405b..541b3f69a481 100644 > --- a/mm/sparse-vmemmap.c > +++ b/mm/sparse-vmemmap.c > @@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, > start, end - 1); > } > > +#ifndef vmemmap_populate_compound_pages > pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int > node, > struct vmem_altmap *altmap, > struct page *reuse) Should vmemmap_pte_populate() be static ? It looks odd to exclude a non-static function based on a non related macro. There are several such function in the block being excluded here. Can you explain why it is correct to do that ? > @@ -446,6 +447,8 @@ static int __meminit > vmemmap_populate_compound_pages(unsigned long start_pfn, > return 0; > } > > +#endif > + > struct page * __meminit __populate_section_memmap(unsigned long pfn, > unsigned long nr_pages, int nid, struct vmem_altmap *altmap, > struct dev_pagemap *pgmap)
Re: [PATCH v2 07/12] s390: add pte_free_defer() for pgtables sharing page
On Wed, Jul 05, 2023 at 02:55:16PM +0200, Gerald Schaefer wrote: > Ah ok, I was aware of that "semi-RCU" fallback logic in tlb_remove_table(), > but that is rather a generic issue, and not s390-specific. I thought you > meant some s390-oddity here, of which we have a lot, unfortunately... > Of course, we call tlb_remove_table() from our page_table_free_rcu(), so > I guess you could say that page_table_free_rcu() cannot guarantee what > tlb_remove_table() cannot guarantee. The issue is the arches don't provide a reliable way to RCU free things, so the core code creates an RCU situation using the MMU batch. With the non-RCU compatible IPI fallback. So it isn't actually RCU, it is IPI but optimized with RCU in some cases. When Hugh introduces a reliable way to RCU free stuff we could fall back to that in the TLB code instead of invoking the synchronize_rcu() For lots of arches, S390 included after this series, this would be pretty easy. What I see now as the big trouble is that this series only addresses PTE RCU'ness and making all the other levels RCUable would be much harder on some arches like power. In short we could create a CONFIG_ARCH_RCU_SAFE_PAGEWALK and it could be done on alot of arches quite simply, but at least not power. Which makes me wonder about the value, but maybe it could shame power into doing something.. However, calling things 'page_table_free_rcu()' when it doesn't actually always do RCU but IPI optimzed RCU is an unfortunate name :( As long as you never assume it does RCU anywhere else, and don't use rcu_read_lock(), it is fine :) The corner case is narrow, you have to OOM the TLB batching before you loose the RCU optimization of the IPI. Then you can notice that rcu_read_lock() doesn't actually protect against concurrent free. Jason
Re: [PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within > vmemmap such that tail page mapping can point to the second PAGE_SIZE area. > Enforce that in vmemmap_can_optimize() function. > > Architectures like powerpc also want to enable vmemmap optimization > conditionally (only with radix MMU translation). Hence allow architecture > override. > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy Why renaming vmemmap_can_optimize() to __vmemmap_can_optimize() and keep it when vmemmap_can_optimize() has been override ? Is that because you expect overriding version of vmemmap_can_optimize() to call __vmemmap_can_optimize() ? > --- > include/linux/mm.h | 27 +++ > mm/mm_init.c | 2 +- > 2 files changed, 24 insertions(+), 5 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 2dd73e4f3d8e..1a2234ee14d2 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3639,13 +3639,32 @@ void vmemmap_free(unsigned long start, unsigned long > end, > struct vmem_altmap *altmap); > #endif > > +#define VMEMMAP_RESERVE_NR 2 > #ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP > -static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, > -struct dev_pagemap *pgmap) > +static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, > + struct dev_pagemap *pgmap) > { > - return is_power_of_2(sizeof(struct page)) && > - pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap; > + unsigned long nr_pages; > + unsigned long nr_vmemmap_pages; > + > + if (!pgmap || !is_power_of_2(sizeof(struct page))) > + return false; > + > + nr_pages = pgmap_vmemmap_nr(pgmap); > + nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT); > + /* > + * For vmemmap optimization with DAX we need minimum 2 vmemmap > + * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst > + */ > + return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR); > } > +/* > + * If we don't have an architecture override, use the generic rule > + */ > +#ifndef vmemmap_can_optimize > +#define vmemmap_can_optimize __vmemmap_can_optimize > +#endif > + > #else > static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, > struct dev_pagemap *pgmap) > diff --git a/mm/mm_init.c b/mm/mm_init.c > index a1963c3322af..245ac69b66a5 100644 > --- a/mm/mm_init.c > +++ b/mm/mm_init.c > @@ -1020,7 +1020,7 @@ static inline unsigned long compound_nr_pages(struct > vmem_altmap *altmap, > if (!vmemmap_can_optimize(altmap, pgmap)) > return pgmap_vmemmap_nr(pgmap); > > - return 2 * (PAGE_SIZE / sizeof(struct page)); > + return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page)); > } > > static void __ref memmap_init_compound(struct page *head,
Re: [PATCH v4 02/13] mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > We will use this in a later patch to do tlb flush when clearing pud entries > on powerpc. This is similar to commit 93a98695f2f9 ("mm: change > pmdp_huge_get_and_clear_full take vm_area_struct as arg") > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy > --- > include/linux/pgtable.h | 4 ++-- > mm/debug_vm_pgtable.c | 2 +- > mm/huge_memory.c| 2 +- > 3 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index cf13f8d938a8..6fd9b2831338 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -450,11 +450,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct > vm_area_struct *vma, > #endif > > #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL > -static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, > +static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma, > unsigned long address, pud_t *pudp, > int full) > { > - return pudp_huge_get_and_clear(mm, address, pudp); > + return pudp_huge_get_and_clear(vma->vm_mm, address, pudp); > } > #endif > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > index ee119e33fef1..ee2c4c1dcfc8 100644 > --- a/mm/debug_vm_pgtable.c > +++ b/mm/debug_vm_pgtable.c > @@ -385,7 +385,7 @@ static void __init pud_advanced_tests(struct > pgtable_debug_args *args) > WARN_ON(!(pud_write(pud) && pud_dirty(pud))); > > #ifndef __PAGETABLE_PMD_FOLDED > - pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1); > + pudp_huge_get_and_clear_full(args->vma, vaddr, args->pudp, 1); > pud = READ_ONCE(*args->pudp); > WARN_ON(!pud_none(pud)); > #endif /* __PAGETABLE_PMD_FOLDED */ > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index eb3678360b97..ba20cef681a4 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -1981,7 +1981,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct > vm_area_struct *vma, > if (!ptl) > return 0; > > - pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm); > + pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm); > tlb_remove_pud_tlb_entry(tlb, pud, addr); > if (vma_is_special_huge(vma)) { > spin_unlock(ptl);
Re: [PATCH v4 01/13] mm/hugepage pud: Allow arch-specific helper function to check huge page pud support
Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit : > Architectures like powerpc would like to enable transparent huge page pud > support only with radix translation. To support that add > has_transparent_pud_hugepage() helper that architectures can override. > > Signed-off-by: Aneesh Kumar K.V Reviewed-by: Christophe Leroy > --- > drivers/nvdimm/pfn_devs.c | 2 +- > include/linux/pgtable.h | 3 +++ > 2 files changed, 4 insertions(+), 1 deletion(-) > > diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c > index af7d9301520c..18ad315581ca 100644 > --- a/drivers/nvdimm/pfn_devs.c > +++ b/drivers/nvdimm/pfn_devs.c > @@ -100,7 +100,7 @@ static unsigned long > *nd_pfn_supported_alignments(unsigned long *alignments) > > if (has_transparent_hugepage()) { > alignments[1] = HPAGE_PMD_SIZE; > - if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) > + if (has_transparent_pud_hugepage()) > alignments[2] = HPAGE_PUD_SIZE; > } > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 5063b482e34f..cf13f8d938a8 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -1499,6 +1499,9 @@ typedef unsigned int pgtbl_mod_mask; > #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) > #endif > > +#ifndef has_transparent_pud_hugepage > +#define has_transparent_pud_hugepage() > IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) > +#endif > /* >* On some architectures it depends on the mm if the p4d/pud or pmd >* layer of the page table hierarchy is folded or not.
[PATCH 2/2] powerpc/crypto: don't build aes-gcm-p10 by default
From: Omar Sandoval None of the other accelerated crypto modules are built by default. Signed-off-by: Omar Sandoval --- arch/powerpc/crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 81ae015861c0..97802c72317c 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -101,7 +101,6 @@ config CRYPTO_AES_GCM_P10 select CRYPTO_ALGAPI select CRYPTO_AEAD select CRYPTO_SKCIPHER - default m help AEAD cipher: AES cipher algorithms (FIPS-197) GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) -- 2.41.0
Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues
On Mon, Jul 10, 2023 at 05:14:15PM +0800, Ming Lei wrote: > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote: > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote: > > > Take blk-mq's knowledge into account for calculating io queues. > > > > > > Fix wrong queue mapping in case of kdump kernel. > > > > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus() > > > still returns all CPUs. > > > > That's simply broken. Please fix the arch code to make sure > > it does not return a bogus num_possible_cpus value for these > > That is documented in Documentation/admin-guide/kdump/kdump.rst. > > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1" > simply keep one of CPU cores as online, and others as offline. > > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for > kdump kernel. > > > setups, otherwise you'll have to paper over it in all kind of > > drivers. > > The issue is only triggered for drivers which use managed irq & > multiple hw queues. Is the problem that the managed interrupt sets the effective irq affinity to an offline CPU? You mentioned observed timeouts; are you seeing the "completion polled" nvme message?
RE: [PATCH] soc: fsl: qe: Replace all non-returning strlcpy with strscpy
> -Original Message- > From: Azeem Shaikh > Sent: Sunday, July 9, 2023 9:36 PM > To: Kees Cook > Cc: Qiang Zhao ; linux-harden...@vger.kernel.org; > linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org; Leo Li > ; linux-arm-ker...@lists.infradead.org > Subject: Re: [PATCH] soc: fsl: qe: Replace all non-returning strlcpy with > strscpy > > On Tue, May 23, 2023 at 1:20 PM Kees Cook > wrote: > > > > On Tue, May 23, 2023 at 02:14:25AM +, Azeem Shaikh wrote: > > > strlcpy() reads the entire source buffer first. > > > This read may exceed the destination size limit. > > > This is both inefficient and can lead to linear read overflows if a > > > source string is not NUL-terminated [1]. > > > In an effort to remove strlcpy() completely [2], replace > > > strlcpy() here with strscpy(). > > > No return values were used, so direct replacement is safe. > > > > > > [1] > > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fww > > > > w.kernel.org%2Fdoc%2Fhtml%2Flatest%2Fprocess%2Fdeprecated.html%23s > tr > > > > lcpy=05%7C01%7Cleoyang.li%40nxp.com%7C11f9df1df1b5440e4ec108 > db8 > > > > 0ee64de%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63824553360 > 3780 > > > > 889%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2lu > MzIiLCJB > > > > TiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=jcTy3IF37wqC1 > MWsSuF > > > %2F51Z1trQEMaow7BHkPSh3hzI%3D=0 > > > [2] > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgi > > > > thub.com%2FKSPP%2Flinux%2Fissues%2F89=05%7C01%7Cleoyang.li% > 40nx > > > > p.com%7C11f9df1df1b5440e4ec108db80ee64de%7C686ea1d3bc2b4c6fa92cd > 99c5 > > > > c301635%7C0%7C0%7C638245533603780889%7CUnknown%7CTWFpbGZsb3d > 8eyJWIjo > > > > iMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C30 > 00%7 > > > > C%7C%7C=Blr0W1oYPIw5uDu7HqlEkU7xOuAo4bQNkk%2Bt%2BAuFqc > s%3D > > > erved=0 > > > > > > Signed-off-by: Azeem Shaikh > > > > Reviewed-by: Kees Cook > > > > Friendly ping on this. Sorry for the late response. But I found some old discussions with the conclusion to be not converting old users. Has this been changed later on? https://lwn.net/Articles/659214/ Regards, Leo
[PATCH 1/2] powerpc/crypto: fix missing skcipher dependency for aes-gcm-p10
From: Omar Sandoval My stripped down configuration fails to build with: ERROR: modpost: "skcipher_walk_aead_encrypt" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! ERROR: modpost: "skcipher_walk_done" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! ERROR: modpost: "skcipher_walk_aead_decrypt" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! Fix it by selecting CRYPTO_SKCIPHER. Signed-off-by: Omar Sandoval --- arch/powerpc/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index ad1872518992..81ae015861c0 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -100,6 +100,7 @@ config CRYPTO_AES_GCM_P10 select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_AEAD + select CRYPTO_SKCIPHER default m help AEAD cipher: AES cipher algorithms (FIPS-197) -- 2.41.0
[PATCH v4 13/13] powerpc/book3s64/radix: Add debug message to give more details of vmemmap allocation
Add some extra vmemmap pr_debug message that will indicate the type of vmemmap allocations. For ex: with DAX vmemmap optimization we can find the below details: [ 187.166580] radix-mmu: PAGE_SIZE vmemmap mapping [ 187.166587] radix-mmu: PAGE_SIZE vmemmap mapping [ 187.166591] radix-mmu: Tail page reuse vmemmap mapping [ 187.166594] radix-mmu: Tail page reuse vmemmap mapping [ 187.166598] radix-mmu: Tail page reuse vmemmap mapping [ 187.166601] radix-mmu: Tail page reuse vmemmap mapping [ 187.166604] radix-mmu: Tail page reuse vmemmap mapping [ 187.166608] radix-mmu: Tail page reuse vmemmap mapping [ 187.166611] radix-mmu: Tail page reuse vmemmap mapping [ 187.166614] radix-mmu: Tail page reuse vmemmap mapping [ 187.166617] radix-mmu: Tail page reuse vmemmap mapping [ 187.166620] radix-mmu: Tail page reuse vmemmap mapping [ 187.166623] radix-mmu: Tail page reuse vmemmap mapping [ 187.166626] radix-mmu: Tail page reuse vmemmap mapping [ 187.166629] radix-mmu: Tail page reuse vmemmap mapping [ 187.166632] radix-mmu: Tail page reuse vmemmap mapping And without vmemmap optimization [ 293.549931] radix-mmu: PMD_SIZE vmemmap mapping [ 293.549984] radix-mmu: PMD_SIZE vmemmap mapping [ 293.550032] radix-mmu: PMD_SIZE vmemmap mapping [ 293.550076] radix-mmu: PMD_SIZE vmemmap mapping [ 293.550117] radix-mmu: PMD_SIZE vmemmap mapping Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/mm/book3s64/radix_pgtable.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 73d0987369ff..2828e7e0802c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1034,6 +1034,7 @@ static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL); if (!p) return NULL; + pr_debug("PAGE_SIZE vmemmap mapping\n"); } else { /* * When a PTE/PMD entry is freed from the init_mm @@ -1046,6 +1047,7 @@ static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long */ get_page(reuse); p = page_to_virt(reuse); + pr_debug("Tail page reuse vmemmap mapping\n"); } VM_BUG_ON(!PAGE_ALIGNED(addr)); @@ -1155,6 +1157,7 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (p) { vmemmap_set_pmd(pmd, p, node, addr, next); + pr_debug("PMD_SIZE vmemmap mapping\n"); continue; } else if (altmap) { /* -- 2.41.0
Re: [PATCH v7 5/8] KVM: x86/mmu: Don't pass FOLL_GET to __kvm_follow_pfn
On Fri, Jul 07, 2023 at 10:35:02AM +0900, David Stevens wrote: > > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > > > > index e44ab512c3a1..b1607e314497 100644 > > > > > --- a/arch/x86/kvm/mmu/mmu.c > > > > > +++ b/arch/x86/kvm/mmu/mmu.c > > > > > > > > ... > > > > > > > > > @@ -2937,6 +2943,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, > > > > > struct kvm_memory_slot *slot, > > > > > bool host_writable = !fault || fault->map_writable; > > > > > bool prefetch = !fault || fault->prefetch; > > > > > bool write_fault = fault && fault->write; > > > > > + bool is_refcounted = !fault || fault->is_refcounted_page; > > > > > > > > Just wonder, what if a non-refcounted page is prefetched? Or is it > > > > possible in > > > > practice? > > > > > > Prefetching is still done via gfn_to_page_many_atomic, which sets > > > FOLL_GET. That's fixable, but it's not something this series currently > > > does. > > > > So if we prefetch a page, REFCOUNTED bit is cleared unconditionally with > > this > > hunk. kvm_set_page_{dirty, accessed} won't be called as expected for > > prefetched > > spte. If I read the patch correctly, REFCOUNTED bit in SPTE should > > represent > > whether the corresponding page is ref-countable or not, right? > > > > Because direct_pte_prefetch_many() is for legacy KVM MMU and > > FNAME(prefetch_pte) > > is shadow paging, we need to test it with legacy KVM MMU or shadow paging > > to hit > > the issue, though. > > > > direct_pte_prefetch_many and prefetch_gpte both pass NULL for the > fault parameter, so is_refcounted will evaluate to true. So the spte's > refcounted bit will get set in that case. Oops, my bad. My point is "unconditionally". Is the bit always set for non-refcountable pages? Or non-refcountable pages are not prefeched? -- Isaku Yamahata
Re: [PATCH net-next v2 01/10] net: wan: Remove unnecessary (void*) conversions
On Mon, Jul 10, 2023 at 02:39:33PM +0800, Su Hui wrote: > From: wuych > > Pointer variables of void * type do not require type cast. > > Signed-off-by: wuych > --- > drivers/net/wan/fsl_ucc_hdlc.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c > index 47c2ad7a3e42..73c73d8f4bb2 100644 > --- a/drivers/net/wan/fsl_ucc_hdlc.c > +++ b/drivers/net/wan/fsl_ucc_hdlc.c > @@ -350,11 +350,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) > static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev) > { > hdlc_device *hdlc = dev_to_hdlc(dev); > - struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv; > - struct qe_bd *bd; > - u16 bd_status; > + struct ucc_hdlc_private *priv = hdlc->priv; > unsigned long flags; > __be16 *proto_head; > + struct qe_bd *bd; > + u16 bd_status; When dealing with existing broken reverse Christmas tree, please don't make it worse with a change. But actually fixing it should be in a different patch. We want patches to be obviously correct. By removing the cast and moving variables around, it is less obvious it is correct, than having two patches. Andrew
Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
On Mon, Jul 10, 2023 at 5:22 PM Thomas Zimmermann wrote: > > I'll append a patch to the series that documents this. > > Sure. Thanks! If you are planning to take it into some other tree: Acked-by: Miguel Ojeda Otherwise, I can take it into the `auxdisplay` tree. Cheers, Miguel
[PATCH v4 12/13] powerpc/book3s64/radix: Remove mmu_vmemmap_psize
This is not used by radix anymore. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/mm/book3s64/radix_pgtable.c | 11 --- arch/powerpc/mm/init_64.c| 21 ++--- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b492b67c0b7d..73d0987369ff 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -601,17 +601,6 @@ void __init radix__early_init_mmu(void) #else mmu_virtual_psize = MMU_PAGE_4K; #endif - -#ifdef CONFIG_SPARSEMEM_VMEMMAP - /* vmemmap mapping */ - if (mmu_psize_defs[MMU_PAGE_2M].shift) { - /* -* map vmemmap using 2M if available -*/ - mmu_vmemmap_psize = MMU_PAGE_2M; - } else - mmu_vmemmap_psize = mmu_virtual_psize; -#endif #endif /* * initialize page table size diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 5701faca39ef..6db7a063ba63 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -198,17 +198,12 @@ bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, return false; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, +struct vmem_altmap *altmap) { bool altmap_alloc; unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; -#ifdef CONFIG_PPC_BOOK3S_64 - if (radix_enabled()) - return radix__vmemmap_populate(start, end, node, altmap); -#endif - /* Align to the page size of the linear mapping. */ start = ALIGN_DOWN(start, page_size); @@ -277,6 +272,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) +{ + +#ifdef CONFIG_PPC_BOOK3S_64 + if (radix_enabled()) + return radix__vmemmap_populate(start, end, node, altmap); +#endif + + return __vmemmap_populate(start, end, node, altmap); +} + #ifdef CONFIG_MEMORY_HOTPLUG static unsigned long vmemmap_list_free(unsigned long start) { -- 2.41.0
[PATCH v4 11/13] powerpc/book3s64/radix: Add support for vmemmap optimization for radix
With 2M PMD-level mapping, we require 32 struct pages and a single vmemmap page can contain 1024 struct pages (PAGE_SIZE/sizeof(struct page)). Hence with 64K page size, we don't use vmemmap deduplication for PMD-level mapping. Signed-off-by: Aneesh Kumar K.V --- Documentation/mm/vmemmap_dedup.rst | 1 + Documentation/powerpc/index.rst| 1 + Documentation/powerpc/vmemmap_dedup.rst| 101 ++ arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/book3s/64/radix.h | 9 + arch/powerpc/mm/book3s64/radix_pgtable.c | 203 + 6 files changed, 316 insertions(+) create mode 100644 Documentation/powerpc/vmemmap_dedup.rst diff --git a/Documentation/mm/vmemmap_dedup.rst b/Documentation/mm/vmemmap_dedup.rst index a4b12ff906c4..c573e08b5043 100644 --- a/Documentation/mm/vmemmap_dedup.rst +++ b/Documentation/mm/vmemmap_dedup.rst @@ -210,6 +210,7 @@ the device (altmap). The following page sizes are supported in DAX: PAGE_SIZE (4K on x86_64), PMD_SIZE (2M on x86_64) and PUD_SIZE (1G on x86_64). +For powerpc equivalent details see Documentation/powerpc/vmemmap_dedup.rst The differences with HugeTLB are relatively minor. diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index d33b554ca7ba..a50834798454 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -36,6 +36,7 @@ powerpc ultravisor vas-api vcpudispatch_stats +vmemmap_dedup features diff --git a/Documentation/powerpc/vmemmap_dedup.rst b/Documentation/powerpc/vmemmap_dedup.rst new file mode 100644 index ..dc4db59fdf87 --- /dev/null +++ b/Documentation/powerpc/vmemmap_dedup.rst @@ -0,0 +1,101 @@ +.. SPDX-License-Identifier: GPL-2.0 + +== +Device DAX +== + +The device-dax interface uses the tail deduplication technique explained in +Documentation/mm/vmemmap_dedup.rst + +On powerpc, vmemmap deduplication is only used with radix MMU translation. Also +with a 64K page size, only the devdax namespace with 1G alignment uses vmemmap +deduplication. + +With 2M PMD level mapping, we require 32 struct pages and a single 64K vmemmap +page can contain 1024 struct pages (64K/sizeof(struct page)). Hence there is no +vmemmap deduplication possible. + +With 1G PUD level mapping, we require 16384 struct pages and a single 64K +vmemmap page can contain 1024 struct pages (64K/sizeof(struct page)). Hence we +require 16 64K pages in vmemmap to map the struct page for 1G PUD level mapping. + +Here's how things look like on device-dax after the sections are populated:: + +---+ ---virt_to_page---> +---+ mapping to +---+ + | | | 0 | -> | 0 | + | | +---++---+ + | | | 1 | -> | 1 | + | | +---++---+ + | | | 2 | ^ ^ ^ ^ ^ ^ + | | +---+ | | | | | + | | | 3 | --+ | | | | + | | +---+ | | | | + | | | 4 | + | | | + |PUD| +---+ | | | + | level | | . | --+ | | + | mapping | +---+ | | + | | | . | + | + | | +---+ | + | | | 15| --+ + | | +---+ + | | + | | + | | + +---+ + + +With 4K page size, 2M PMD level mapping requires 512 struct pages and a single +4K vmemmap page contains 64 struct pages(4K/sizeof(struct page)). Hence we +require 8 4K pages in vmemmap to map the struct page for 2M pmd level mapping. + +Here's how things look like on device-dax after the sections are populated:: + + +---+ ---virt_to_page---> +---+ mapping to +---+ + | | | 0 | -> | 0 | + | | +---++---+ + | | | 1 | -> | 1 | + | | +---++---+ + | | | 2 | ^ ^ ^ ^ ^ ^ + | | +---+ | | | | | + | | | 3 | --+ | | | | + | |
[PATCH v4 10/13] powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function
This is in preparation to update radix to implement vmemmap optimization for devdax. Below are the rules w.r.t radix vmemmap mapping 1. First try to map things using PMD (2M) 2. With altmap if altmap cross-boundary check returns true, fall back to PAGE_SIZE 3. If we can't allocate PMD_SIZE backing memory for vmemmap, fallback to PAGE_SIZE On removing vmemmap mapping, check if every subsection that is using the vmemmap area is invalid. If found to be invalid, that implies we can safely free the vmemmap area. We don't use the PAGE_UNUSED pattern used by x86 because with 64K page size, we need to do the above check even at the PAGE_SIZE granularity. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/radix.h | 2 + arch/powerpc/include/asm/pgtable.h | 4 + arch/powerpc/mm/book3s64/radix_pgtable.c | 326 +++-- arch/powerpc/mm/init_64.c | 26 +- 4 files changed, 327 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 2ef92f36340f..f1461289643a 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -331,6 +331,8 @@ extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long phys); int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +void __ref radix__vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap); extern void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 6a88bfdaa69b..68817ea7f994 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -165,6 +165,10 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } + +int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size); +bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, + unsigned long page_size); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 227fea53c217..9a7f3707b6fb 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -744,8 +744,59 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d) p4d_clear(p4d); } +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end) +{ + unsigned long start = ALIGN_DOWN(addr, PMD_SIZE); + + return !vmemmap_populated(start, PMD_SIZE); +} + +static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + + return !vmemmap_populated(start, PAGE_SIZE); + +} +#endif + +static void __meminit free_vmemmap_pages(struct page *page, +struct vmem_altmap *altmap, +int order) +{ + unsigned int nr_pages = 1 << order; + + if (altmap) { + unsigned long alt_start, alt_end; + unsigned long base_pfn = page_to_pfn(page); + + /* +* with 2M vmemmap mmaping we can have things setup +* such that even though atlmap is specified we never +* used altmap. +*/ + alt_start = altmap->base_pfn; + alt_end = altmap->base_pfn + altmap->reserve + + altmap->free + altmap->alloc + altmap->align; + + if (base_pfn >= alt_start && base_pfn < alt_end) { + vmem_altmap_free(altmap, nr_pages); + return; + } + } + + if (PageReserved(page)) { + /* allocated from memblock */ + while (nr_pages--) + free_reserved_page(page++); + } else + free_pages((unsigned long)page_address(page), order); +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, -unsigned long end, bool direct) +unsigned long end, bool direct, +struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte; @@ -759,24 +810,26 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, if (!pte_present(*pte)) continue; - if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) { - /* -
[PATCH v4 09/13] powerpc/book3s64/mm: Enable transparent pud hugepage
This is enabled only with radix translation and 1G hugepage size. This will be used with devdax device memory with a namespace alignment of 1G. Anon transparent hugepage is not supported even though we do have helpers checking pud_trans_huge(). We should never find that return true. The only expected pte bit combination is _PAGE_PTE | _PAGE_DEVMAP. Some of the helpers are never expected to get called on hash translation and hence is marked to call BUG() in such a case. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash.h | 9 + arch/powerpc/include/asm/book3s/64/pgtable.h | 155 -- arch/powerpc/include/asm/book3s/64/radix.h| 36 .../include/asm/book3s/64/tlbflush-radix.h| 2 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 8 + arch/powerpc/mm/book3s64/pgtable.c| 78 + arch/powerpc/mm/book3s64/radix_pgtable.c | 28 arch/powerpc/mm/book3s64/radix_tlb.c | 7 + arch/powerpc/platforms/Kconfig.cputype| 1 + include/trace/events/thp.h| 10 ++ 10 files changed, 323 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d4a19e6547ac..6e70ae511631 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -138,7 +138,16 @@ static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b) } #definehash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) + +/* + * pud comparison that will work with both pte and page table pointer. + */ +static inline int hash__pud_same(pud_t pud_a, pud_t pud_b) +{ + return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); +} #definehash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) + static inline int hash__p4d_bad(p4d_t p4d) { return (p4d_val(p4d) == 0); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4acc9690f599..38ac50279199 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -921,8 +921,29 @@ static inline pud_t pte_pud(pte_t pte) { return __pud_raw(pte_raw(pte)); } + +static inline pte_t *pudp_ptep(pud_t *pud) +{ + return (pte_t *)pud; +} + +#define pud_pfn(pud) pte_pfn(pud_pte(pud)) +#define pud_dirty(pud) pte_dirty(pud_pte(pud)) +#define pud_young(pud) pte_young(pud_pte(pud)) +#define pud_mkold(pud) pte_pud(pte_mkold(pud_pte(pud))) +#define pud_wrprotect(pud) pte_pud(pte_wrprotect(pud_pte(pud))) +#define pud_mkdirty(pud) pte_pud(pte_mkdirty(pud_pte(pud))) +#define pud_mkclean(pud) pte_pud(pte_mkclean(pud_pte(pud))) +#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) +#define pud_mkwrite(pud) pte_pud(pte_mkwrite(pud_pte(pud))) #define pud_write(pud) pte_write(pud_pte(pud)) +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#define pud_soft_dirty(pmd)pte_soft_dirty(pud_pte(pud)) +#define pud_mksoft_dirty(pmd) pte_pud(pte_mksoft_dirty(pud_pte(pud))) +#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud))) +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + static inline int pud_bad(pud_t pud) { if (radix_enabled()) @@ -1115,15 +1136,24 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool write) #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); +extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); +extern void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud); + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { } +static inline void update_mmu_cache_pud(struct vm_area_struct *vma, + unsigned long addr, pud_t *pud) +{ +} + extern int hash__has_transparent_hugepage(void); static inline int has_transparent_hugepage(void) { @@ -1133,6 +1163,14 @@ static inline int has_transparent_hugepage(void) } #define has_transparent_hugepage has_transparent_hugepage +static inline int has_transparent_pud_hugepage(void) +{ + if (radix_enabled()) + return radix__has_transparent_pud_hugepage(); + return 0; +} +#define has_transparent_pud_hugepage has_transparent_pud_hugepage + static inline unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long clr, unsigned long set) @@ -1142,6 +1180,16 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
[PATCH v4 08/13] powerpc/mm/trace: Convert trace event to trace event class
A follow-up patch will add a pud variant for this same event. Using event class makes that addition simpler. No functional change in this patch. Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- include/trace/events/thp.h | 23 --- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 51f48984abca..988948d69bc1 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr old = be64_to_cpu(old_be); - trace_hugepage_update(addr, old, clr, set); + trace_hugepage_update_pmd(addr, old, clr, set); if (old & H_PAGE_HASHPTE) hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index e7ea492ac510..02e185d2e4d6 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -962,7 +962,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add #endif old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1); - trace_hugepage_update(addr, old, clr, set); + trace_hugepage_update_pmd(addr, old, clr, set); return old; } diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h index 202b3e3e67ff..a95c78b10561 100644 --- a/include/trace/events/thp.h +++ b/include/trace/events/thp.h @@ -8,25 +8,29 @@ #include #include -TRACE_EVENT(hugepage_set_pmd, +DECLARE_EVENT_CLASS(hugepage_set, - TP_PROTO(unsigned long addr, unsigned long pmd), - TP_ARGS(addr, pmd), + TP_PROTO(unsigned long addr, unsigned long pte), + TP_ARGS(addr, pte), TP_STRUCT__entry( __field(unsigned long, addr) - __field(unsigned long, pmd) + __field(unsigned long, pte) ), TP_fast_assign( __entry->addr = addr; - __entry->pmd = pmd; + __entry->pte = pte; ), - TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, __entry->pmd) + TP_printk("Set page table entry with 0x%lx with 0x%lx", __entry->addr, __entry->pte) ); +DEFINE_EVENT(hugepage_set, hugepage_set_pmd, + TP_PROTO(unsigned long addr, unsigned long pmd), + TP_ARGS(addr, pmd) +); -TRACE_EVENT(hugepage_update, +DECLARE_EVENT_CLASS(hugepage_update, TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, unsigned long set), TP_ARGS(addr, pte, clr, set), @@ -48,6 +52,11 @@ TRACE_EVENT(hugepage_update, TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set) ); +DEFINE_EVENT(hugepage_update, hugepage_update_pmd, + TP_PROTO(unsigned long addr, unsigned long pmd, unsigned long clr, unsigned long set), + TP_ARGS(addr, pmd, clr, set) +); + DECLARE_EVENT_CLASS(migration_pmd, TP_PROTO(unsigned long addr, unsigned long pmd), -- 2.41.0
[PATCH v4 07/13] mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization
Arm disabled hugetlb vmemmap optimization [1] because hugetlb vmemmap optimization includes an update of both the permissions (writeable to read-only) and the output address (pfn) of the vmemmap ptes. That is not supported without unmapping of pte(marking it invalid) by some architectures. With DAX vmemmap optimization we don't require such pte updates and architectures can enable DAX vmemmap optimization while having hugetlb vmemmap optimization disabled. Hence split DAX optimization support into a different config. s390, loongarch and riscv don't have devdax support. So the DAX config is not enabled for them. With this change, arm64 should be able to select DAX optimization [1] commit 060a2c92d1b6 ("arm64: mm: hugetlb: Disable HUGETLB_PAGE_OPTIMIZE_VMEMMAP") Signed-off-by: Aneesh Kumar K.V --- arch/loongarch/Kconfig | 2 +- arch/riscv/Kconfig | 2 +- arch/s390/Kconfig | 2 +- arch/x86/Kconfig | 3 ++- fs/Kconfig | 2 +- include/linux/mm.h | 2 +- mm/Kconfig | 5 - 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e55511af4c77..537ca2a4005a 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -59,7 +59,7 @@ config LOONGARCH select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT select COMMON_CLK diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4c07b9189c86..6943d34c1ec1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,7 +53,7 @@ config RISCV select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL - select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5b39918b7042..975fd06e4f4d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -127,7 +127,7 @@ config S390 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION - select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DMA_OPS if PCI diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7422db409770..78224aa76409 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -128,7 +128,8 @@ config X86 select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_VMEMMAP if X86_64 + select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT diff --git a/fs/Kconfig b/fs/Kconfig index 18d034ec7953..9c104c130a6e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -252,7 +252,7 @@ config HUGETLB_PAGE config HUGETLB_PAGE_OPTIMIZE_VMEMMAP def_bool HUGETLB_PAGE - depends on ARCH_WANT_OPTIMIZE_VMEMMAP + depends on ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP depends on SPARSEMEM_VMEMMAP config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON diff --git a/include/linux/mm.h b/include/linux/mm.h index 1a2234ee14d2..83f51ec0897d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3640,7 +3640,7 @@ void vmemmap_free(unsigned long start, unsigned long end, #endif #define VMEMMAP_RESERVE_NR 2 -#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { diff --git a/mm/Kconfig b/mm/Kconfig index 09130434e30d..923bd35f81f2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -487,7 +487,10 @@ config SPARSEMEM_VMEMMAP # Select this config option from the architecture Kconfig, if it is preferred # to enable the feature of HugeTLB/dev_dax vmemmap optimization. # -config ARCH_WANT_OPTIMIZE_VMEMMAP +config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP + bool + +config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP bool config HAVE_MEMBLOCK_PHYS_MAP -- 2.41.0
[PATCH v4 06/13] mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE
pudp_set_wrprotect and move_huge_pud helpers are only used when CONFIG_TRANSPARENT_HUGEPAGE is enabled. Similar to pmdp_set_wrprotect and move_huge_pmd_helpers use architecture override only if CONFIG_TRANSPARENT_HUGEPAGE is set Signed-off-by: Aneesh Kumar K.V --- include/linux/pgtable.h | 2 ++ mm/mremap.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 91def34f7784..b5af3e014606 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -558,6 +558,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long address, pud_t *pudp) { @@ -571,6 +572,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm, { BUILD_BUG(); } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #endif diff --git a/mm/mremap.c b/mm/mremap.c index 11e06e4ab33b..056478c106ee 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -349,7 +349,7 @@ static inline bool move_normal_pud(struct vm_area_struct *vma, } #endif -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { -- 2.41.0
[PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME
This helps architectures to override pmd_same and pud_same independently. Signed-off-by: Aneesh Kumar K.V --- include/linux/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 6fd9b2831338..91def34f7784 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { return pmd_val(pmd_a) == pmd_val(pmd_b); } +#endif +#ifndef __HAVE_ARCH_PUD_SAME static inline int pud_same(pud_t pud_a, pud_t pud_b) { return pud_val(pud_a) == pud_val(pud_b); -- 2.41.0
[PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works
Architectures like powerpc will like to use different page table allocators and mapping mechanisms to implement vmemmap optimization. Similar to vmemmap_populate allow architectures to implement vmemap_populate_compound_pages Signed-off-by: Aneesh Kumar K.V --- mm/sparse-vmemmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a044a130405b..541b3f69a481 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, start, end - 1); } +#ifndef vmemmap_populate_compound_pages pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct vmem_altmap *altmap, struct page *reuse) @@ -446,6 +447,8 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, return 0; } +#endif + struct page * __meminit __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap) -- 2.41.0
[PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override
dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within vmemmap such that tail page mapping can point to the second PAGE_SIZE area. Enforce that in vmemmap_can_optimize() function. Architectures like powerpc also want to enable vmemmap optimization conditionally (only with radix MMU translation). Hence allow architecture override. Signed-off-by: Aneesh Kumar K.V --- include/linux/mm.h | 27 +++ mm/mm_init.c | 2 +- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2dd73e4f3d8e..1a2234ee14d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3639,13 +3639,32 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap); #endif +#define VMEMMAP_RESERVE_NR 2 #ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP -static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, - struct dev_pagemap *pgmap) +static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) { - return is_power_of_2(sizeof(struct page)) && - pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap; + unsigned long nr_pages; + unsigned long nr_vmemmap_pages; + + if (!pgmap || !is_power_of_2(sizeof(struct page))) + return false; + + nr_pages = pgmap_vmemmap_nr(pgmap); + nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT); + /* +* For vmemmap optimization with DAX we need minimum 2 vmemmap +* pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst +*/ + return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR); } +/* + * If we don't have an architecture override, use the generic rule + */ +#ifndef vmemmap_can_optimize +#define vmemmap_can_optimize __vmemmap_can_optimize +#endif + #else static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) diff --git a/mm/mm_init.c b/mm/mm_init.c index a1963c3322af..245ac69b66a5 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1020,7 +1020,7 @@ static inline unsigned long compound_nr_pages(struct vmem_altmap *altmap, if (!vmemmap_can_optimize(altmap, pgmap)) return pgmap_vmemmap_nr(pgmap); - return 2 * (PAGE_SIZE / sizeof(struct page)); + return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page)); } static void __ref memmap_init_compound(struct page *head, -- 2.41.0
[PATCH v4 02/13] mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg
We will use this in a later patch to do tlb flush when clearing pud entries on powerpc. This is similar to commit 93a98695f2f9 ("mm: change pmdp_huge_get_and_clear_full take vm_area_struct as arg") Signed-off-by: Aneesh Kumar K.V --- include/linux/pgtable.h | 4 ++-- mm/debug_vm_pgtable.c | 2 +- mm/huge_memory.c| 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cf13f8d938a8..6fd9b2831338 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -450,11 +450,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL -static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, int full) { - return pudp_huge_get_and_clear(mm, address, pudp); + return pudp_huge_get_and_clear(vma->vm_mm, address, pudp); } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index ee119e33fef1..ee2c4c1dcfc8 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -385,7 +385,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) WARN_ON(!(pud_write(pud) && pud_dirty(pud))); #ifndef __PAGETABLE_PMD_FOLDED - pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1); + pudp_huge_get_and_clear_full(args->vma, vaddr, args->pudp, 1); pud = READ_ONCE(*args->pudp); WARN_ON(!pud_none(pud)); #endif /* __PAGETABLE_PMD_FOLDED */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index eb3678360b97..ba20cef681a4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1981,7 +1981,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, if (!ptl) return 0; - pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm); + pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm); tlb_remove_pud_tlb_entry(tlb, pud, addr); if (vma_is_special_huge(vma)) { spin_unlock(ptl); -- 2.41.0
[PATCH v4 01/13] mm/hugepage pud: Allow arch-specific helper function to check huge page pud support
Architectures like powerpc would like to enable transparent huge page pud support only with radix translation. To support that add has_transparent_pud_hugepage() helper that architectures can override. Signed-off-by: Aneesh Kumar K.V --- drivers/nvdimm/pfn_devs.c | 2 +- include/linux/pgtable.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index af7d9301520c..18ad315581ca 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -100,7 +100,7 @@ static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) if (has_transparent_hugepage()) { alignments[1] = HPAGE_PMD_SIZE; - if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) + if (has_transparent_pud_hugepage()) alignments[2] = HPAGE_PUD_SIZE; } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5063b482e34f..cf13f8d938a8 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1499,6 +1499,9 @@ typedef unsigned int pgtbl_mod_mask; #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) #endif +#ifndef has_transparent_pud_hugepage +#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +#endif /* * On some architectures it depends on the mm if the p4d/pud or pmd * layer of the page table hierarchy is folded or not. -- 2.41.0
[PATCH v4 00/13] Add support for DAX vmemmap optimization for ppc64
This patch series implements changes required to support DAX vmemmap optimization for ppc64. The vmemmap optimization is only enabled with radix MMU translation and 1GB PUD mapping with 64K page size. The patch series also split hugetlb vmemmap optimization as a separate Kconfig variable so that architectures can enable DAX vmemmap optimization without enabling hugetlb vmemmap optimization. This should enable architectures like arm64 to enable DAX vmemmap optimization while they can't enable hugetlb vmemmap optimization. More details of the same are in patch "mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization" Changes from v3: * Rebase to latest linus tree * Build fix with SPARSEMEM_VMEMMP disabled * Add hash_pud_same outisde THP Kconfig Changes from v2: * Rebase to latest linus tree * Address review feedback Changes from V1: * Fix make htmldocs warning * Fix vmemmap allocation bugs with different alignment values. * Correctly check for section validity to before we free vmemmap area Aneesh Kumar K.V (13): mm/hugepage pud: Allow arch-specific helper function to check huge page pud support mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override mm/vmemmap: Allow architectures to override how vmemmap optimization works mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization powerpc/mm/trace: Convert trace event to trace event class powerpc/book3s64/mm: Enable transparent pud hugepage powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function powerpc/book3s64/radix: Add support for vmemmap optimization for radix powerpc/book3s64/radix: Remove mmu_vmemmap_psize powerpc/book3s64/radix: Add debug message to give more details of vmemmap allocation Documentation/mm/vmemmap_dedup.rst| 1 + Documentation/powerpc/index.rst | 1 + Documentation/powerpc/vmemmap_dedup.rst | 101 +++ arch/loongarch/Kconfig| 2 +- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/book3s/64/hash.h | 9 + arch/powerpc/include/asm/book3s/64/pgtable.h | 155 - arch/powerpc/include/asm/book3s/64/radix.h| 47 ++ .../include/asm/book3s/64/tlbflush-radix.h| 2 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 8 + arch/powerpc/include/asm/pgtable.h| 4 + arch/powerpc/mm/book3s64/hash_pgtable.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c| 78 +++ arch/powerpc/mm/book3s64/radix_pgtable.c | 573 -- arch/powerpc/mm/book3s64/radix_tlb.c | 7 + arch/powerpc/mm/init_64.c | 37 +- arch/powerpc/platforms/Kconfig.cputype| 1 + arch/riscv/Kconfig| 2 +- arch/s390/Kconfig | 2 +- arch/x86/Kconfig | 3 +- drivers/nvdimm/pfn_devs.c | 2 +- fs/Kconfig| 2 +- include/linux/mm.h| 29 +- include/linux/pgtable.h | 11 +- include/trace/events/thp.h| 33 +- mm/Kconfig| 5 +- mm/debug_vm_pgtable.c | 2 +- mm/huge_memory.c | 2 +- mm/mm_init.c | 2 +- mm/mremap.c | 2 +- mm/sparse-vmemmap.c | 3 + 31 files changed, 1047 insertions(+), 82 deletions(-) create mode 100644 Documentation/powerpc/vmemmap_dedup.rst -- 2.41.0
Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
Hi Am 10.07.23 um 16:24 schrieb Miguel Ojeda: On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann wrote: The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. `framebuffer_alloc()` does indeed use `kzalloc()`, but the docs do not mention the zeroing. Should that guarantee be documented? I'll append a patch to the series that documents this. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. occurences -> occurrences can -> will maybe? Since the intention of the patch series is to remove it (them) altogether). Sure. Best regards Thomas Thanks! Cheers, Miguel -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Frankenstrasse 146, 90461 Nuernberg, Germany GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman HRB 36809 (AG Nuernberg) OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann wrote: > > The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct > fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do > not set it. `framebuffer_alloc()` does indeed use `kzalloc()`, but the docs do not mention the zeroing. Should that guarantee be documented? > Flags should signal differences from the default values. After cleaning > up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. occurences -> occurrences can -> will maybe? Since the intention of the patch series is to remove it (them) altogether). Thanks! Cheers, Miguel
Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
Hi! On Mon, 2023-07-10 at 16:04 +0200, Thomas Zimmermann wrote: > > > I won't argue with that, but the flag itself is wrong. > > > FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct > > > fb_videomode.flag. The valid flags for this field are at [1]. If > > > anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which > > > has the same value. > > > > > > [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681 > > > > FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that > > instead? > > Sure, I'll update the patch accordingly. Thanks! I'll ack the updated patch. Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer `. `' Physicist `-GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
Hi Am 10.07.23 um 15:59 schrieb John Paul Adrian Glaubitz: Hi Thomas! On Mon, 2023-07-10 at 15:52 +0200, Thomas Zimmermann wrote: I would argue that the current code is more readable that your proposed change. I agree that it's a no-op, but code is not just about functionality but also readability, isn't it? I won't argue with that, but the flag itself is wrong. FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct fb_videomode.flag. The valid flags for this field are at [1]. If anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which has the same value. [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681 FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that instead? Sure, I'll update the patch accordingly. Best regards Thomas Also, I prefer "sh:" as the architecture prefix, not "arch/sh:". Ok. Thanks. Adrian -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Frankenstrasse 146, 90461 Nuernberg, Germany GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman HRB 36809 (AG Nuernberg) OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
Hi Thomas! On Mon, 2023-07-10 at 15:52 +0200, Thomas Zimmermann wrote: > > I would argue that the current code is more readable that your proposed > > change. > > > > I agree that it's a no-op, but code is not just about functionality but also > > readability, isn't it? > > I won't argue with that, but the flag itself is wrong. > FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct > fb_videomode.flag. The valid flags for this field are at [1]. If > anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which > has the same value. > > [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681 FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that instead? > > > > Also, I prefer "sh:" as the architecture prefix, not "arch/sh:". > > Ok. Thanks. Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer `. `' Physicist `-GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
Hi Am 10.07.23 um 15:42 schrieb John Paul Adrian Glaubitz: Hi Thomas! On Mon, 2023-07-10 at 14:50 +0200, Thomas Zimmermann wrote: FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info. Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT is 0 and the static declaration already clears the memory area of sh7763fb_videomode. So remove the assignment. Signed-off-by: Thomas Zimmermann Cc: Yoshinori Sato Cc: Rich Felker Cc: John Paul Adrian Glaubitz --- arch/sh/boards/mach-sh7763rdp/setup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c index 97e715e4e9b3..345f2b76c85a 100644 --- a/arch/sh/boards/mach-sh7763rdp/setup.c +++ b/arch/sh/boards/mach-sh7763rdp/setup.c @@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = { .vsync_len = 1, .sync = 0, .vmode = FB_VMODE_NONINTERLACED, - .flag = FBINFO_FLAG_DEFAULT, }; static struct sh7760fb_platdata sh7763fb_def_pdata = { I would argue that the current code is more readable that your proposed change. I agree that it's a no-op, but code is not just about functionality but also readability, isn't it? I won't argue with that, but the flag itself is wrong. FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct fb_videomode.flag. The valid flags for this field are at [1]. If anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which has the same value. [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681 Also, I prefer "sh:" as the architecture prefix, not "arch/sh:". Ok. Best regards Thomas Thanks, Adrian -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Frankenstrasse 146, 90461 Nuernberg, Germany GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman HRB 36809 (AG Nuernberg) OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
Hi Thomas! On Mon, 2023-07-10 at 14:50 +0200, Thomas Zimmermann wrote: > FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info. > Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT > is 0 and the static declaration already clears the memory area of > sh7763fb_videomode. So remove the assignment. > > Signed-off-by: Thomas Zimmermann > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: John Paul Adrian Glaubitz > --- > arch/sh/boards/mach-sh7763rdp/setup.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c > b/arch/sh/boards/mach-sh7763rdp/setup.c > index 97e715e4e9b3..345f2b76c85a 100644 > --- a/arch/sh/boards/mach-sh7763rdp/setup.c > +++ b/arch/sh/boards/mach-sh7763rdp/setup.c > @@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = { > .vsync_len = 1, > .sync = 0, > .vmode = FB_VMODE_NONINTERLACED, > - .flag = FBINFO_FLAG_DEFAULT, > }; > > static struct sh7760fb_platdata sh7763fb_def_pdata = { I would argue that the current code is more readable that your proposed change. I agree that it's a no-op, but code is not just about functionality but also readability, isn't it? Also, I prefer "sh:" as the architecture prefix, not "arch/sh:". Thanks, Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer `. `' Physicist `-GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Re: [PATCH 10/17] hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver
On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann wrote: > > The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct > fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do > not set it. > > Flags should signal differences from the default values. After cleaning > up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. > > Signed-off-by: Thomas Zimmermann > Cc: "Bruno Prémont" > Cc: Jiri Kosina > Cc: Benjamin Tissoires Acked-by: Benjamin Tissoires Feel free to take this through the DRI tree (or any other that handles FB) with the rest of the series if you want. Cheers, Benjamin > --- > drivers/hid/hid-picolcd_fb.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c > index dabcd054dad9..d726aaafb146 100644 > --- a/drivers/hid/hid-picolcd_fb.c > +++ b/drivers/hid/hid-picolcd_fb.c > @@ -527,7 +527,6 @@ int picolcd_init_framebuffer(struct picolcd_data *data) > info->var = picolcdfb_var; > info->fix = picolcdfb_fix; > info->fix.smem_len = PICOLCDFB_SIZE*8; > - info->flags = FBINFO_FLAG_DEFAULT; > > fbdata = info->par; > spin_lock_init(>lock); > -- > 2.41.0 >
[PATCH 05/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller Cc: Nicolas Ferre Cc: Benjamin Herrenschmidt Cc: Ferenc Bakonyi Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Wei Liu Cc: Dexuan Cui Cc: Antonino Daplas Cc: Maik Broemme Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Kristoffer Ericson Cc: Hans de Goede Cc: Steve Glendinning Cc: Bernie Thompson Cc: Florian Tobias Schandinat --- drivers/video/fbdev/amifb.c | 5 ++--- drivers/video/fbdev/asiliantfb.c | 1 - drivers/video/fbdev/atmel_lcdfb.c| 2 +- drivers/video/fbdev/aty/atyfb_base.c | 3 +-- drivers/video/fbdev/aty/radeon_base.c| 3 +-- drivers/video/fbdev/bw2.c| 1 - drivers/video/fbdev/carminefb.c | 1 - drivers/video/fbdev/cg14.c | 2 +- drivers/video/fbdev/cg3.c| 1 - drivers/video/fbdev/cg6.c| 2 +- drivers/video/fbdev/chipsfb.c| 1 - drivers/video/fbdev/cirrusfb.c | 3 +-- drivers/video/fbdev/clps711x-fb.c| 1 - drivers/video/fbdev/cobalt_lcdfb.c | 1 - drivers/video/fbdev/ep93xx-fb.c | 1 - drivers/video/fbdev/ffb.c| 3 +-- drivers/video/fbdev/fm2fb.c | 1 - drivers/video/fbdev/gbefb.c | 1 - drivers/video/fbdev/geode/gx1fb_core.c | 1 - drivers/video/fbdev/geode/gxfb_core.c| 1 - drivers/video/fbdev/geode/lxfb_core.c| 1 - drivers/video/fbdev/grvga.c | 2 +- drivers/video/fbdev/hgafb.c | 2 +- drivers/video/fbdev/hitfb.c | 2 +- drivers/video/fbdev/hyperv_fb.c | 2 -- drivers/video/fbdev/i740fb.c | 2 +- drivers/video/fbdev/i810/i810_main.c | 4 ++-- drivers/video/fbdev/imsttfb.c| 3 +-- drivers/video/fbdev/intelfb/intelfbdrv.c | 4 ++-- drivers/video/fbdev/kyro/fbdev.c | 1 - drivers/video/fbdev/leo.c| 1 - drivers/video/fbdev/mb862xx/mb862xxfbdrv.c | 2 +- drivers/video/fbdev/mmp/fb/mmpfb.c | 2 +- drivers/video/fbdev/neofb.c | 2 +- drivers/video/fbdev/nvidia/nvidia.c | 4 ++-- drivers/video/fbdev/offb.c | 2 +- drivers/video/fbdev/p9100.c | 1 - drivers/video/fbdev/platinumfb.c | 1 - drivers/video/fbdev/pm2fb.c | 3 +-- drivers/video/fbdev/pm3fb.c | 3 +-- drivers/video/fbdev/pmag-aa-fb.c | 1 - drivers/video/fbdev/pmag-ba-fb.c | 1 - drivers/video/fbdev/pmagb-b-fb.c | 1 - drivers/video/fbdev/ps3fb.c | 2 +- drivers/video/fbdev/pvr2fb.c | 2 +- drivers/video/fbdev/pxa168fb.c | 2 +- drivers/video/fbdev/q40fb.c | 1 - drivers/video/fbdev/riva/fbdev.c | 3 +-- drivers/video/fbdev/s1d13xxxfb.c | 4 ++-- drivers/video/fbdev/savage/savagefb_driver.c | 3 +-- drivers/video/fbdev/simplefb.c | 1 - drivers/video/fbdev/sis/sis_main.c | 3 +-- drivers/video/fbdev/skeletonfb.c | 2 +- drivers/video/fbdev/smscufx.c| 2 +- drivers/video/fbdev/sstfb.c | 1 - drivers/video/fbdev/sunxvr1000.c | 1 - drivers/video/fbdev/sunxvr2500.c | 1 - drivers/video/fbdev/sunxvr500.c | 1 - drivers/video/fbdev/tcx.c| 1 - drivers/video/fbdev/tdfxfb.c | 2 +- drivers/video/fbdev/tgafb.c | 2 +- drivers/video/fbdev/tridentfb.c | 2 +- drivers/video/fbdev/udlfb.c | 2 +- drivers/video/fbdev/via/viafbdev.c | 2 +- 64 files changed, 41 insertions(+), 81 deletions(-) diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c index d88265dbebf4..cea782283b9c 100644 --- a/drivers/video/fbdev/amifb.c +++ b/drivers/video/fbdev/amifb.c @@ -2427,7 +2427,7 @@ static int amifb_set_par(struct fb_info *info) info->fix.ywrapstep = 1; info->fix.xpanstep = 0; info->fix.ypanstep = 0; - info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YWRAP | + info->flags = FBINFO_HWACCEL_YWRAP | FBINFO_READS_FAST; /* override SCROLL_REDRAW */ } else { info->fix.ywrapstep = 0; @@ -2436,7 +2436,7 @@ static int amifb_set_par(struct fb_info *info) else info->fix.xpanstep = 16 << maxfmode; info->fix.ypanstep = 1; - info->flags = FBINFO_DEFAULT |
[PATCH 17/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT
Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT. No functional changes. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- include/linux/fb.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/fb.h b/include/linux/fb.h index 1d5c13f34b09..43458f582f35 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -383,7 +383,6 @@ struct fb_tile_ops { #endif /* CONFIG_FB_TILEBLITTING */ /* FBINFO_* = fb_info.flags bit flags */ -#define FBINFO_DEFAULT 0 #define FBINFO_HWACCEL_DISABLED0x0002 /* When FBINFO_HWACCEL_DISABLED is set: * Hardware acceleration is turned off. Software implementations @@ -504,8 +503,6 @@ struct fb_info { bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; -#define FBINFO_FLAG_DEFAULTFBINFO_DEFAULT - /* This will go away * fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags * when it wants to turn the acceleration engine on. This is -- 2.41.0
[PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Miguel Ojeda Cc: Robin van der Gracht --- drivers/auxdisplay/cfag12864bfb.c | 1 - drivers/auxdisplay/ht16k33.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c index c2cab7e2b126..729845bcc803 100644 --- a/drivers/auxdisplay/cfag12864bfb.c +++ b/drivers/auxdisplay/cfag12864bfb.c @@ -79,7 +79,6 @@ static int cfag12864bfb_probe(struct platform_device *device) info->var = cfag12864bfb_var; info->pseudo_palette = NULL; info->par = NULL; - info->flags = FBINFO_FLAG_DEFAULT; if (register_framebuffer(info) < 0) goto fballoced; diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index edaf92b7ea77..df3f37651e45 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -646,7 +646,6 @@ static int ht16k33_fbdev_probe(struct device *dev, struct ht16k33_priv *priv, fbdev->info->var = ht16k33_fb_var; fbdev->info->bl_dev = bl; fbdev->info->pseudo_palette = NULL; - fbdev->info->flags = FBINFO_FLAG_DEFAULT; fbdev->info->par = priv; err = register_framebuffer(fbdev->info); -- 2.41.0
[PATCH 13/17] fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by kzalloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- drivers/video/fbdev/amba-clcd.c | 1 - drivers/video/fbdev/matrox/matroxfb_crtc2.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index e45338227be6..24d89e6fb780 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -461,7 +461,6 @@ static int clcdfb_register(struct clcd_fb *fb) } fb->fb.fbops= _ops; - fb->fb.flags= FBINFO_FLAG_DEFAULT; fb->fb.pseudo_palette = fb->cmap; strncpy(fb->fb.fix.id, clcd_name, sizeof(fb->fb.fix.id)); diff --git a/drivers/video/fbdev/matrox/matroxfb_crtc2.c b/drivers/video/fbdev/matrox/matroxfb_crtc2.c index 7655afa3fd50..372197c124de 100644 --- a/drivers/video/fbdev/matrox/matroxfb_crtc2.c +++ b/drivers/video/fbdev/matrox/matroxfb_crtc2.c @@ -603,9 +603,8 @@ static int matroxfb_dh_regit(const struct matrox_fb_info *minfo, void* oldcrtc2; m2info->fbcon.fbops = _dh_ops; - m2info->fbcon.flags = FBINFO_FLAG_DEFAULT; - m2info->fbcon.flags |= FBINFO_HWACCEL_XPAN | - FBINFO_HWACCEL_YPAN; + m2info->fbcon.flags = FBINFO_HWACCEL_XPAN | + FBINFO_HWACCEL_YPAN; m2info->fbcon.pseudo_palette = m2info->cmap; fb_alloc_cmap(>fbcon.cmap, 256, 1); -- 2.41.0
[PATCH 16/17] fbdev/pxafb: Remove flag FBINFO_FLAG_DEFAULT
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by devm_kzalloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- drivers/video/fbdev/pxafb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index c8c4677d06b4..beffb0602a2c 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -888,7 +888,6 @@ static void init_pxafb_overlay(struct pxafb_info *fbi, struct pxafb_layer *ofb, ofb->fb.var.vmode = FB_VMODE_NONINTERLACED; ofb->fb.fbops = _fb_ops; - ofb->fb.flags = FBINFO_FLAG_DEFAULT; ofb->fb.node= -1; ofb->fb.pseudo_palette = NULL; -- 2.41.0
[PATCH 14/17] fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Jaya Kumar Cc: Helge Deller Cc: Peter Jones Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Shawn Guo Cc: Fabio Estevam Cc: NXP Linux Team Cc: Maik Broemme Cc: Jingoo Han Cc: Sudip Mukherjee Cc: Teddy Wang Cc: Michal Januszewski --- drivers/video/fbdev/arcfb.c| 1 - drivers/video/fbdev/aty/aty128fb.c | 1 - drivers/video/fbdev/broadsheetfb.c | 2 +- drivers/video/fbdev/da8xx-fb.c | 1 - drivers/video/fbdev/efifb.c| 1 - drivers/video/fbdev/goldfishfb.c | 1 - drivers/video/fbdev/gxt4500.c | 3 +-- drivers/video/fbdev/hecubafb.c | 2 +- drivers/video/fbdev/imxfb.c| 3 +-- drivers/video/fbdev/intelfb/intelfbdrv.c | 1 - drivers/video/fbdev/metronomefb.c | 2 +- drivers/video/fbdev/mx3fb.c| 1 - drivers/video/fbdev/omap/omapfb_main.c | 1 - drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 1 - drivers/video/fbdev/s3c-fb.c | 1 - drivers/video/fbdev/sh_mobile_lcdcfb.c | 2 -- drivers/video/fbdev/sis/sis_main.c | 2 -- drivers/video/fbdev/sm501fb.c | 2 +- drivers/video/fbdev/sm712fb.c | 1 - drivers/video/fbdev/uvesafb.c | 3 +-- drivers/video/fbdev/vesafb.c | 2 +- drivers/video/fbdev/vfb.c | 1 - drivers/video/fbdev/vga16fb.c | 2 +- drivers/video/fbdev/xen-fbfront.c | 2 +- 24 files changed, 10 insertions(+), 29 deletions(-) diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c index 9aaea3be8281..cff11cb04a55 100644 --- a/drivers/video/fbdev/arcfb.c +++ b/drivers/video/fbdev/arcfb.c @@ -546,7 +546,6 @@ static int arcfb_probe(struct platform_device *dev) par->c2io_addr = c2io_addr; par->cslut[0] = 0x00; par->cslut[1] = 0x06; - info->flags = FBINFO_FLAG_DEFAULT; spin_lock_init(>lock); if (irq) { par->irq = irq; diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index 2d9320a52e51..b44fc78ccd4f 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -1927,7 +1927,6 @@ static int aty128_init(struct pci_dev *pdev, const struct pci_device_id *ent) /* fill in info */ info->fbops = _ops; - info->flags = FBINFO_FLAG_DEFAULT; par->lcd_on = default_lcd_on; par->crt_on = default_crt_on; diff --git a/drivers/video/fbdev/broadsheetfb.c b/drivers/video/fbdev/broadsheetfb.c index cb725a91b6bb..e51e14c29c55 100644 --- a/drivers/video/fbdev/broadsheetfb.c +++ b/drivers/video/fbdev/broadsheetfb.c @@ -1069,7 +1069,7 @@ static int broadsheetfb_probe(struct platform_device *dev) mutex_init(>io_lock); - info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; + info->flags = FBINFO_VIRTFB; info->fbdefio = _defio; fb_deferred_io_init(info); diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c index 60cd1286370f..988dedcf6be8 100644 --- a/drivers/video/fbdev/da8xx-fb.c +++ b/drivers/video/fbdev/da8xx-fb.c @@ -1463,7 +1463,6 @@ static int fb_probe(struct platform_device *device) da8xx_fb_var.bits_per_pixel = lcd_cfg->bpp; /* Initialize fbinfo */ - da8xx_fb_info->flags = FBINFO_FLAG_DEFAULT; da8xx_fb_info->fix = da8xx_fb_fix; da8xx_fb_info->var = da8xx_fb_var; da8xx_fb_info->fbops = _fb_ops; diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index 3d7be69ab593..3391c8e84210 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -555,7 +555,6 @@ static int efifb_probe(struct platform_device *dev) info->fbops = _ops; info->var = efifb_defined; info->fix = efifb_fix; - info->flags = FBINFO_FLAG_DEFAULT; orientation = drm_get_panel_orientation_quirk(efifb_defined.xres, efifb_defined.yres); diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 6fa2108fd912..ef2528c3faa9 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -212,7 +212,6 @@ static int goldfish_fb_probe(struct platform_device *pdev) height = readl(fb->reg_base + FB_GET_HEIGHT); fb->fb.fbops= _fb_ops; - fb->fb.flags= FBINFO_FLAG_DEFAULT; fb->fb.pseudo_palette = fb->cmap; fb->fb.fix.type = FB_TYPE_PACKED_PIXELS;
[PATCH 10/17] hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: "Bruno Prémont" Cc: Jiri Kosina Cc: Benjamin Tissoires --- drivers/hid/hid-picolcd_fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c index dabcd054dad9..d726aaafb146 100644 --- a/drivers/hid/hid-picolcd_fb.c +++ b/drivers/hid/hid-picolcd_fb.c @@ -527,7 +527,6 @@ int picolcd_init_framebuffer(struct picolcd_data *data) info->var = picolcdfb_var; info->fix = picolcdfb_fix; info->fix.smem_len = PICOLCDFB_SIZE*8; - info->flags = FBINFO_FLAG_DEFAULT; fbdata = info->par; spin_lock_init(>lock); -- 2.41.0
[PATCH 15/17] fbdev/atafb: Remove flag FBINFO_FLAG_DEFAULT
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by a static declaration. So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- drivers/video/fbdev/atafb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c index 2bc4089865e6..c4a420b791b9 100644 --- a/drivers/video/fbdev/atafb.c +++ b/drivers/video/fbdev/atafb.c @@ -3112,7 +3112,6 @@ static int __init atafb_probe(struct platform_device *pdev) #ifdef ATAFB_FALCON fb_info.pseudo_palette = current_par.hw.falcon.pseudo_palette; #endif - fb_info.flags = FBINFO_FLAG_DEFAULT; if (!fb_find_mode(_info.var, _info, mode_option, atafb_modedb, NUM_TOTAL_MODES, _modedb[defmode], -- 2.41.0
[PATCH 03/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by kzalloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller Cc: Russell King --- drivers/video/fbdev/controlfb.c | 2 +- drivers/video/fbdev/cyber2000fb.c | 2 +- drivers/video/fbdev/valkyriefb.c | 1 - drivers/video/fbdev/vermilion/vermilion.c | 2 +- drivers/video/fbdev/vt8500lcdfb.c | 3 +-- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index 82eeb139c4eb..717134c141ff 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -775,7 +775,7 @@ static void __init control_init_info(struct fb_info *info, struct fb_info_contro info->par = >par; info->fbops = _ops; info->pseudo_palette = p->pseudo_palette; -info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; + info->flags = FBINFO_HWACCEL_YPAN; info->screen_base = p->frame_buffer + CTRLFB_OFF; fb_alloc_cmap(>cmap, 256, 0); diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c index 38c0a6866d76..98ea56a9abf1 100644 --- a/drivers/video/fbdev/cyber2000fb.c +++ b/drivers/video/fbdev/cyber2000fb.c @@ -1459,7 +1459,7 @@ static struct cfb_info *cyberpro_alloc_fb_info(unsigned int id, char *name) cfb->fb.var.accel_flags = FB_ACCELF_TEXT; cfb->fb.fbops = _ops; - cfb->fb.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; + cfb->fb.flags = FBINFO_HWACCEL_YPAN; cfb->fb.pseudo_palette = cfb->pseudo_palette; spin_lock_init(>reg_b0_lock); diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c index b166b7cfe0e5..fd4488777032 100644 --- a/drivers/video/fbdev/valkyriefb.c +++ b/drivers/video/fbdev/valkyriefb.c @@ -535,7 +535,6 @@ static int __init valkyrie_init_info(struct fb_info *info, { info->fbops = _ops; info->screen_base = p->frame_buffer + 0x1000; - info->flags = FBINFO_DEFAULT; info->pseudo_palette = p->pseudo_palette; info->par = >par; return fb_alloc_cmap(>cmap, 256, 0); diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c index 32e74e02a02f..71584c775efd 100644 --- a/drivers/video/fbdev/vermilion/vermilion.c +++ b/drivers/video/fbdev/vermilion/vermilion.c @@ -477,7 +477,7 @@ static int vml_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) } info = >info; - info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK; + info->flags = FBINFO_PARTIAL_PAN_OK; err = vmlfb_enable_mmio(par); if (err) diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c index 31d4e85b220c..42d39a9d5130 100644 --- a/drivers/video/fbdev/vt8500lcdfb.c +++ b/drivers/video/fbdev/vt8500lcdfb.c @@ -300,8 +300,7 @@ static int vt8500lcd_probe(struct platform_device *pdev) fbi->fb.var.vmode = FB_VMODE_NONINTERLACED; fbi->fb.fbops = _ops; - fbi->fb.flags = FBINFO_DEFAULT - | FBINFO_HWACCEL_COPYAREA + fbi->fb.flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_YPAN | FBINFO_VIRTFB -- 2.41.0
[PATCH 11/17] media: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by kzalloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Andy Walls Cc: Mauro Carvalho Chehab Cc: Hans Verkuil --- drivers/media/pci/ivtv/ivtvfb.c | 1 - drivers/media/test-drivers/vivid/vivid-osd.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c index 0aeb9daaee4c..23c8c094e791 100644 --- a/drivers/media/pci/ivtv/ivtvfb.c +++ b/drivers/media/pci/ivtv/ivtvfb.c @@ -1048,7 +1048,6 @@ static int ivtvfb_init_vidmode(struct ivtv *itv) /* Generate valid fb_info */ oi->ivtvfb_info.node = -1; - oi->ivtvfb_info.flags = FBINFO_FLAG_DEFAULT; oi->ivtvfb_info.par = itv; oi->ivtvfb_info.var = oi->ivtvfb_defined; oi->ivtvfb_info.fix = oi->ivtvfb_fix; diff --git a/drivers/media/test-drivers/vivid/vivid-osd.c b/drivers/media/test-drivers/vivid/vivid-osd.c index ec25edc679b3..051f1805a16d 100644 --- a/drivers/media/test-drivers/vivid/vivid-osd.c +++ b/drivers/media/test-drivers/vivid/vivid-osd.c @@ -310,7 +310,6 @@ static int vivid_fb_init_vidmode(struct vivid_dev *dev) /* Generate valid fb_info */ dev->fb_info.node = -1; - dev->fb_info.flags = FBINFO_FLAG_DEFAULT; dev->fb_info.par = dev; dev->fb_info.var = dev->fb_defined; dev->fb_info.fix = dev->fb_fix; -- 2.41.0
[PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info. Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT is 0 and the static declaration already clears the memory area of sh7763fb_videomode. So remove the assignment. Signed-off-by: Thomas Zimmermann Cc: Yoshinori Sato Cc: Rich Felker Cc: John Paul Adrian Glaubitz --- arch/sh/boards/mach-sh7763rdp/setup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c index 97e715e4e9b3..345f2b76c85a 100644 --- a/arch/sh/boards/mach-sh7763rdp/setup.c +++ b/arch/sh/boards/mach-sh7763rdp/setup.c @@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = { .vsync_len = 1, .sync = 0, .vmode = FB_VMODE_NONINTERLACED, - .flag = FBINFO_FLAG_DEFAULT, }; static struct sh7760fb_platdata sh7763fb_def_pdata = { -- 2.41.0
[PATCH 02/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by a static declaration. So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- drivers/video/fbdev/68328fb.c | 2 +- drivers/video/fbdev/acornfb.c | 2 +- drivers/video/fbdev/g364fb.c | 2 +- drivers/video/fbdev/hpfb.c | 1 - drivers/video/fbdev/macfb.c| 1 - drivers/video/fbdev/maxinefb.c | 1 - 6 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/68328fb.c b/drivers/video/fbdev/68328fb.c index 07d6e8dc686b..956dd2399cc0 100644 --- a/drivers/video/fbdev/68328fb.c +++ b/drivers/video/fbdev/68328fb.c @@ -448,7 +448,7 @@ static int __init mc68x328fb_init(void) fb_info.var.red.offset = fb_info.var.green.offset = fb_info.var.blue.offset = 0; } fb_info.pseudo_palette = _pseudo_palette; - fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; + fb_info.flags = FBINFO_HWACCEL_YPAN; if (fb_alloc_cmap(_info.cmap, 256, 0)) return -ENOMEM; diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c index 1b72edc01cfb..8fec21dfca09 100644 --- a/drivers/video/fbdev/acornfb.c +++ b/drivers/video/fbdev/acornfb.c @@ -694,7 +694,7 @@ static void acornfb_init_fbinfo(void) first = 0; fb_info.fbops = _ops; - fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; + fb_info.flags = FBINFO_HWACCEL_YPAN; fb_info.pseudo_palette = current_par.pseudo_palette; strcpy(fb_info.fix.id, "Acorn"); diff --git a/drivers/video/fbdev/g364fb.c b/drivers/video/fbdev/g364fb.c index c5b7673ddc6c..0825cbde116e 100644 --- a/drivers/video/fbdev/g364fb.c +++ b/drivers/video/fbdev/g364fb.c @@ -219,7 +219,7 @@ int __init g364fb_init(void) fb_info.screen_base = (char *) G364_MEM_BASE; /* virtual kernel address */ fb_info.var = fb_var; fb_info.fix = fb_fix; - fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; + fb_info.flags = FBINFO_HWACCEL_YPAN; fb_alloc_cmap(_info.cmap, 255, 0); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index 77fbff47b1a8..406c1383cbda 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -287,7 +287,6 @@ static int hpfb_init_one(unsigned long phys_base, unsigned long virt_base) else strcat(fb_info.fix.id, "Catseye"); fb_info.fbops = _ops; - fb_info.flags = FBINFO_DEFAULT; fb_info.var = hpfb_defined; fb_info.screen_base = (char *)fb_start; diff --git a/drivers/video/fbdev/macfb.c b/drivers/video/fbdev/macfb.c index 44ff860a3f37..5ca208d992cc 100644 --- a/drivers/video/fbdev/macfb.c +++ b/drivers/video/fbdev/macfb.c @@ -876,7 +876,6 @@ static int __init macfb_init(void) fb_info.var = macfb_defined; fb_info.fix = macfb_fix; fb_info.pseudo_palette = pseudo_palette; - fb_info.flags = FBINFO_DEFAULT; err = fb_alloc_cmap(_info.cmap, video_cmap_len, 0); if (err) diff --git a/drivers/video/fbdev/maxinefb.c b/drivers/video/fbdev/maxinefb.c index 4e6b05232ae2..0ac1873b2acb 100644 --- a/drivers/video/fbdev/maxinefb.c +++ b/drivers/video/fbdev/maxinefb.c @@ -155,7 +155,6 @@ int __init maxinefb_init(void) fb_info.screen_base = (char *)maxinefb_fix.smem_start; fb_info.var = maxinefb_defined; fb_info.fix = maxinefb_fix; - fb_info.flags = FBINFO_DEFAULT; fb_alloc_cmap(_info.cmap, 256, 0); -- 2.41.0
[PATCH 01/17] drm: Remove flag FBINFO_DEFAULT from fbdev emulation
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Patrik Jakobsson Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" --- drivers/gpu/drm/drm_fbdev_dma.c | 1 - drivers/gpu/drm/drm_fbdev_generic.c | 1 - drivers/gpu/drm/gma500/fbdev.c| 2 +- drivers/gpu/drm/radeon/radeon_fbdev.c | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index 8217f1ddc007..bc5fdb1da6a3 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -123,7 +123,6 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); info->fbops = _fbdev_dma_fb_ops; - info->flags = FBINFO_DEFAULT; /* screen */ info->flags |= FBINFO_VIRTFB; /* system memory */ diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index 98ae703848a0..8a5600b33e10 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -109,7 +109,6 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); info->fbops = _fbdev_generic_fb_ops; - info->flags = FBINFO_DEFAULT; /* screen */ info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c index 955cbe9f05a7..b09a3ef770d4 100644 --- a/drivers/gpu/drm/gma500/fbdev.c +++ b/drivers/gpu/drm/gma500/fbdev.c @@ -215,7 +215,7 @@ static int psb_fbdev_fb_probe(struct drm_fb_helper *fb_helper, } info->fbops = _fbdev_fb_ops; - info->flags = FBINFO_DEFAULT; + /* Accessed stolen memory directly */ info->screen_base = dev_priv->vram_addr + backing->offset; info->screen_size = size; diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index ab9c1abbac97..c632ca03032b 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -253,7 +253,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct drm_fb_helper *fb_helper, } info->fbops = _fbdev_fb_ops; - info->flags = FBINFO_DEFAULT; + /* radeon resume is fragile and needs a vt switch to help it along */ info->skip_vt_switch = false; -- 2.41.0
[PATCH 12/17] staging: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Greg Kroah-Hartman Cc: Sudip Mukherjee Cc: Teddy Wang --- drivers/staging/fbtft/fbtft-core.c | 2 +- drivers/staging/sm750fb/sm750.c| 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 3a4abf3bae40..eac1d570f437 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -684,7 +684,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, info->var.transp.offset = 0; info->var.transp.length = 0; - info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; + info->flags = FBINFO_VIRTFB; par = info->par; par->info = info; diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c index c260f73cf570..79bcd5bd4938 100644 --- a/drivers/staging/sm750fb/sm750.c +++ b/drivers/staging/sm750fb/sm750.c @@ -807,7 +807,6 @@ static int lynxfb_set_fbinfo(struct fb_info *info, int index) info->screen_base = crtc->v_screen; pr_debug("screen_base vaddr = %p\n", info->screen_base); info->screen_size = line_length * var->yres_virtual; - info->flags = FBINFO_FLAG_DEFAULT | 0; /* set info->fix */ fix->type = FB_TYPE_PACKED_PIXELS; -- 2.41.0
[PATCH 04/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by devm_kzalloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Helge Deller --- drivers/video/fbdev/pxafb.c| 1 - drivers/video/fbdev/sa1100fb.c | 1 - drivers/video/fbdev/wm8505fb.c | 3 +-- drivers/video/fbdev/xilinxfb.c | 1 - 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 2a8b1dea3a67..c8c4677d06b4 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -1826,7 +1826,6 @@ static struct pxafb_info *pxafb_init_fbinfo(struct device *dev, fbi->fb.var.vmode = FB_VMODE_NONINTERLACED; fbi->fb.fbops = _ops; - fbi->fb.flags = FBINFO_DEFAULT; fbi->fb.node= -1; addr = fbi; diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index a2408bf00ca0..3d76ce111488 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1089,7 +1089,6 @@ static struct sa1100fb_info *sa1100fb_init_fbinfo(struct device *dev) fbi->fb.var.vmode = FB_VMODE_NONINTERLACED; fbi->fb.fbops = _ops; - fbi->fb.flags = FBINFO_DEFAULT; fbi->fb.monspecs= monspecs; fbi->fb.pseudo_palette = fbi->pseudo_palette; diff --git a/drivers/video/fbdev/wm8505fb.c b/drivers/video/fbdev/wm8505fb.c index 10a8b1250103..5833147aa43d 100644 --- a/drivers/video/fbdev/wm8505fb.c +++ b/drivers/video/fbdev/wm8505fb.c @@ -285,8 +285,7 @@ static int wm8505fb_probe(struct platform_device *pdev) fbi->fb.fix.accel = FB_ACCEL_NONE; fbi->fb.fbops = _ops; - fbi->fb.flags = FBINFO_DEFAULT - | FBINFO_HWACCEL_COPYAREA + fbi->fb.flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c index 2aa3a528277f..768a281a8d2c 100644 --- a/drivers/video/fbdev/xilinxfb.c +++ b/drivers/video/fbdev/xilinxfb.c @@ -324,7 +324,6 @@ static int xilinxfb_assign(struct platform_device *pdev, drvdata->info.fix.line_length = pdata->xvirt * BYTES_PER_PIXEL; drvdata->info.pseudo_palette = drvdata->pseudo_palette; - drvdata->info.flags = FBINFO_DEFAULT; drvdata->info.var = xilinx_fb_var; drvdata->info.var.height = pdata->screen_height_mm; drvdata->info.var.width = pdata->screen_width_mm; -- 2.41.0
[PATCH 00/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT flags
Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT from fbdev and drivers, as briefly discussed at [1]. Both flags were maybe useful when fbdev had special handling for driver modules. With commit 376b3ff54c9a ("fbdev: Nuke FBINFO_MODULE"), they are both 0 and have no further effect. Patches 1 to 7 remove FBINFO_DEFAULT from drivers. Patches 2 to 5 split this by the way the fb_info struct is being allocated. All flags are cleared to zero during the allocation. Patches 8 to 16 do the same for FBINFO_FLAG_DEFAULT. Patch 8 fixes an actual bug in how arch/sh uses the tokne for struct fb_videomode, which is unrelated. Patch 17 removes both flag constants from [1] https://lore.kernel.org/dri-devel/877crer8fm@minerva.mail-host-address-is-not-set/ Thomas Zimmermann (17): drm: Remove flag FBINFO_DEFAULT from fbdev emulation fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers fbdev/fsl-diu-fb: Remove flag FBINFO_DEFAULT vfio-mdev: Remove flag FBINFO_DEFAULT from fbdev sample driver arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver media: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers staging: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers fbdev/atafb: Remove flag FBINFO_FLAG_DEFAULT fbdev/pxafb: Remove flag FBINFO_FLAG_DEFAULT fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT arch/sh/boards/mach-sh7763rdp/setup.c | 1 - drivers/auxdisplay/cfag12864bfb.c | 1 - drivers/auxdisplay/ht16k33.c | 1 - drivers/gpu/drm/drm_fbdev_dma.c| 1 - drivers/gpu/drm/drm_fbdev_generic.c| 1 - drivers/gpu/drm/gma500/fbdev.c | 2 +- drivers/gpu/drm/radeon/radeon_fbdev.c | 2 +- drivers/hid/hid-picolcd_fb.c | 1 - drivers/media/pci/ivtv/ivtvfb.c| 1 - drivers/media/test-drivers/vivid/vivid-osd.c | 1 - drivers/staging/fbtft/fbtft-core.c | 2 +- drivers/staging/sm750fb/sm750.c| 1 - drivers/video/fbdev/68328fb.c | 2 +- drivers/video/fbdev/acornfb.c | 2 +- drivers/video/fbdev/amba-clcd.c| 1 - drivers/video/fbdev/amifb.c| 5 ++--- drivers/video/fbdev/arcfb.c| 1 - drivers/video/fbdev/asiliantfb.c | 1 - drivers/video/fbdev/atafb.c| 1 - drivers/video/fbdev/atmel_lcdfb.c | 2 +- drivers/video/fbdev/aty/aty128fb.c | 1 - drivers/video/fbdev/aty/atyfb_base.c | 3 +-- drivers/video/fbdev/aty/radeon_base.c | 3 +-- drivers/video/fbdev/broadsheetfb.c | 2 +- drivers/video/fbdev/bw2.c | 1 - drivers/video/fbdev/carminefb.c| 1 - drivers/video/fbdev/cg14.c | 2 +- drivers/video/fbdev/cg3.c | 1 - drivers/video/fbdev/cg6.c | 2 +- drivers/video/fbdev/chipsfb.c | 1 - drivers/video/fbdev/cirrusfb.c | 3 +-- drivers/video/fbdev/clps711x-fb.c | 1 - drivers/video/fbdev/cobalt_lcdfb.c | 1 - drivers/video/fbdev/controlfb.c| 2 +- drivers/video/fbdev/cyber2000fb.c | 2 +- drivers/video/fbdev/da8xx-fb.c | 1 - drivers/video/fbdev/efifb.c| 1 - drivers/video/fbdev/ep93xx-fb.c| 1 - drivers/video/fbdev/ffb.c | 3 +-- drivers/video/fbdev/fm2fb.c| 1 - drivers/video/fbdev/fsl-diu-fb.c | 2 +- drivers/video/fbdev/g364fb.c | 2 +- drivers/video/fbdev/gbefb.c| 1 - drivers/video/fbdev/geode/gx1fb_core.c | 1 - drivers/video/fbdev/geode/gxfb_core.c | 1 - drivers/video/fbdev/geode/lxfb_core.c | 1 - drivers/video/fbdev/goldfishfb.c | 1 - drivers/video/fbdev/grvga.c| 2 +- drivers/video/fbdev/gxt4500.c | 3 +-- drivers/video/fbdev/hecubafb.c | 2 +- drivers/video/fbdev/hgafb.c| 2 +- drivers/video/fbdev/hitfb.c| 2 +- drivers/video/fbdev/hpfb.c | 1 - drivers/video/fbdev/hyperv_fb.c| 2 -- drivers/video/fbdev/i740fb.c | 2 +- drivers/video/fbdev/i810/i810_main.c | 4 ++-- drivers/video/fbdev/imsttfb.c | 3 +-- drivers/video/fbdev/imxfb.c| 3 +-- drivers/video/fbdev/intelfb/intelfbdrv.c | 5
[PATCH 06/17] fbdev/fsl-diu-fb: Remove flag FBINFO_DEFAULT
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by dmam_alloc_coherent(__GFP_ZERO). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Timur Tabi Cc: Helge Deller --- drivers/video/fbdev/fsl-diu-fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c index 785eb8a06943..c62b48f27ba9 100644 --- a/drivers/video/fbdev/fsl-diu-fb.c +++ b/drivers/video/fbdev/fsl-diu-fb.c @@ -1476,7 +1476,7 @@ static int install_fb(struct fb_info *info) info->var.activate = FB_ACTIVATE_NOW; info->fbops = _diu_ops; - info->flags = FBINFO_DEFAULT | FBINFO_VIRTFB | FBINFO_PARTIAL_PAN_OK | + info->flags = FBINFO_VIRTFB | FBINFO_PARTIAL_PAN_OK | FBINFO_READS_FAST; info->pseudo_palette = mfbi->pseudo_palette; -- 2.41.0
[PATCH 07/17] vfio-mdev: Remove flag FBINFO_DEFAULT from fbdev sample driver
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do not set it. Flags should signal differences from the default values. After cleaning up all occurences of FBINFO_DEFAULT, the token can be removed. Signed-off-by: Thomas Zimmermann Cc: Kirti Wankhede --- samples/vfio-mdev/mdpy-fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c index 3c8001b9e407..cda477b28685 100644 --- a/samples/vfio-mdev/mdpy-fb.c +++ b/samples/vfio-mdev/mdpy-fb.c @@ -162,7 +162,6 @@ static int mdpy_fb_probe(struct pci_dev *pdev, } info->fbops = _fb_ops; - info->flags = FBINFO_DEFAULT; info->pseudo_palette = par->palette; ret = register_framebuffer(info); -- 2.41.0
[PATCH v2 10/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_partition sysfs interface file
Add details of the new hv-gpci interface file called "affinity_domain_via_partition" in the ABI documentation. Signed-off-by: Kajol Jain --- .../sysfs-bus-event_source-devices-hv_gpci| 32 +++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index d8e65b93d1f7..b03b2bd4b081 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -208,3 +208,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_partition +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCESS", + "H_AUTHORITY" and "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY", can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. -- 2.31.1
[PATCH v2 09/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via partition information
The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0XB1), can be used to get the system affinity domain via partition information. To expose the system affinity domain via partition information, patch adds sysfs file called "affinity_domain_via_partition" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add new entry for AFFINITY_DOMAIN_VIA_PAR in sysinfo_counter_request array, which points to the counter request value "affinity_domain_via_partition" in hv-gpci.c file. Also add a new function called "affinity_domain_via_partition_result_parse" to parse the hcall result and store it in output buffer. The affinity_domain_via_partition sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_partition attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Signed-off-by: Kajol Jain --- arch/powerpc/perf/hv-gpci.c | 160 +++- 1 file changed, 159 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 326b758df7c8..f2fff166290b 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -107,7 +107,8 @@ static ssize_t cpumask_show(struct device *dev, #define INTERFACE_PROCESSOR_CONFIG_ATTR7 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 -#define INTERFACE_NULL_ATTR10 +#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 +#define INTERFACE_NULL_ATTR11 /* Counter request value to retrieve system information */ enum { @@ -115,6 +116,7 @@ enum { PROCESSOR_CONFIG, AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ + AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ }; static int sysinfo_counter_request[] = { @@ -122,6 +124,7 @@ static int sysinfo_counter_request[] = { [PROCESSOR_CONFIG] = 0x90, [AFFINITY_DOMAIN_VIA_VP] = 0xA0, [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, + [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -458,6 +461,152 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device return ret; } +static void affinity_domain_via_partition_result_parse(int returned_values, + int element_size, char *buf, size_t *last_element, + size_t *n, struct hv_gpci_request_buffer *arg) +{ + size_t i = 0, j = 0; + size_t k, l, m; + uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; + + /* +* hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' +* to show the total number of counter_value array elements +* returned via hcall. +* Unlike other request types, the data structure returned by this +* request is variable-size. For this counter request type, +* hcall populates 'cv_element_size' corresponds to minimum size of +* the structure returned i.e; the size of the structure with no domain +* information. Below loop go through all counter_value array +* to determine the number and size of each domain array element and +* add it to the output buffer. +*/ + while (i < returned_values) { + k = j; + for (; k < j + element_size; k++) + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + *n += sprintf(buf + *n, "\n"); + + total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; + size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; + + for (l = 0; l < total_affinity_domain_ele; l++) { + for (m = 0; m < size_of_each_affinity_domain_ele; m++) { + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + k++; + } + *n += sprintf(buf + *n, "\n"); + } + + *n += sprintf(buf + *n, "\n"); + i++; + j = k; + } + + *last_element = k; +} + +static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + size_t last_element = 0; + u32 starting_index; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* +
[PATCH v2 08/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_domain sysfs interface file
Add details of the new hv-gpci interface file called "affinity_domain_via_domain" in the ABI documentation. Signed-off-by: Kajol Jain --- .../sysfs-bus-event_source-devices-hv_gpci| 32 +++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 3b63d66658fe..d8e65b93d1f7 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -176,3 +176,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_domain +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCESS", + "H_AUTHORITY" and "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY", can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. -- 2.31.1
[PATCH v2 07/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via domain information
The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0XB0), can be used to get the system affinity domain via domain information. To expose the system affinity domain via domain information, patch adds sysfs file called "affinity_domain_via_domain" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add new entry for AFFINITY_DOMAIN_VIA_DOM in sysinfo_counter_request array, which points to the counter request value "affinity_domain_via_domain" in hv-gpci.c file. The affinity_domain_via_domain sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_domain attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Signed-off-by: Kajol Jain --- arch/powerpc/perf/hv-gpci.c | 80 - 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 68502cb18262..326b758df7c8 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -106,19 +106,22 @@ static ssize_t cpumask_show(struct device *dev, #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 #define INTERFACE_PROCESSOR_CONFIG_ATTR7 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 -#define INTERFACE_NULL_ATTR9 +#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 +#define INTERFACE_NULL_ATTR10 /* Counter request value to retrieve system information */ enum { PROCESSOR_BUS_TOPOLOGY, PROCESSOR_CONFIG, AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ + AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ }; static int sysinfo_counter_request[] = { [PROCESSOR_BUS_TOPOLOGY] = 0xD0, [PROCESSOR_CONFIG] = 0x90, [AFFINITY_DOMAIN_VIA_VP] = 0xA0, + [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -389,6 +392,72 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, return ret; } +static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* +* Pass the counter request 0xB0 corresponds to request +* type 'Affinity_domain_information_by_domain', +* to retrieve the system affinity domain information. +* starting_index value refers to the starting hardware +* processor index. +*/ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + 0, 0, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* +* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which +* implies that buffer can't accommodate all information, and a partial buffer +* returned. To handle that, we need to take subsequent requests +* with next starting index to retrieve additional (missing) data. +* Below loop do subsequent hcalls with next starting index and add it +* to buffer util we get all the information. +*/ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* +* Since the starting index value is part of counter_value +* buffer elements, use the starting index value in the last +* element and add 1 to make subsequent hcalls. +*/ + u32 starting_index = arg->bytes[last_element + 1] + + (arg->bytes[last_element] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + starting_index, 0, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -420,6 +489,11 @@ static struct attribute *interface_attrs[] = { * attribute, set in init function if applicable. */ NULL, +
[PATCH v2 06/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_virtual_processor sysfs interface file
Add details of the new hv-gpci interface file called "affinity_domain_via_virtual_processor" in the ABI documentation. Signed-off-by: Kajol Jain --- .../sysfs-bus-event_source-devices-hv_gpci| 32 +++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index aff52dc3b05c..3b63d66658fe 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -144,3 +144,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCESS", + "H_AUTHORITY" and "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY", can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. -- 2.31.1
[PATCH v2 05/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via virtual processor information
The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0XA0), can be used to get the system affinity domain via virtual processor information. To expose the system affinity domain via virtual processor information, patch adds sysfs file called "affinity_domain_via_virtual_processor" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. The affinity_domain_via_virtual_processor sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_virtual_processor attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Signed-off-by: Kajol Jain --- arch/powerpc/perf/hv-gpci.c | 86 - 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c74076d3c7a7..68502cb18262 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -105,17 +105,20 @@ static ssize_t cpumask_show(struct device *dev, /* Interface attribute array index to store system information */ #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 #define INTERFACE_PROCESSOR_CONFIG_ATTR7 -#define INTERFACE_NULL_ATTR8 +#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 +#define INTERFACE_NULL_ATTR9 /* Counter request value to retrieve system information */ enum { PROCESSOR_BUS_TOPOLOGY, - PROCESSOR_CONFIG + PROCESSOR_CONFIG, + AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ }; static int sysinfo_counter_request[] = { [PROCESSOR_BUS_TOPOLOGY] = 0xD0, [PROCESSOR_CONFIG] = 0x90, + [AFFINITY_DOMAIN_VIA_VP] = 0xA0, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -316,6 +319,76 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute return ret; } +static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* +* Pass the counter request 0xA0 corresponds to request +* type 'Affinity_domain_information_by_virutal_processor', +* to retrieve the system affinity domain information. +* starting_index value refers to the starting hardware +* processor index. +*/ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + 0, 0, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* +* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which +* implies that buffer can't accommodate all information, and a partial buffer +* returned. To handle that, we need to take subsequent requests +* with next secondary index to retrieve additional (missing) data. +* Below loop do subsequent hcalls with next secondary index and add it +* to buffer util we get all the information. +*/ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* +* Since the starting index and secondary index type is part of the +* counter_value buffer elements, use the starting index value in the +* last array element as subsequent starting index, and use secondary index +* value in the last array element plus 1 as subsequent secondary index. +* For counter request '0xA0', starting index points to partition id +* and secondary index points to corresponding virtual processor index. +*/ + u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); + u16 secondary_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + starting_index, secondary_index, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: +
[PATCH v2 04/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document processor_config sysfs interface file
Add details of the new hv-gpci interface file called "processor_config" in the ABI documentation. Signed-off-by: Kajol Jain --- .../sysfs-bus-event_source-devices-hv_gpci| 32 +++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 2eeeab9a20fa..aff52dc3b05c 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -112,3 +112,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/processor_config +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + PROCESSOR_CONFIG(0x90). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCESS", + "H_AUTHORITY" and "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY", can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. -- 2.31.1
[PATCH v2 03/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor config information
The hcall H_GET_PERF_COUNTER_INFO with counter request value as PROCESSOR_CONFIG(0X90), can be used to get the system processor configuration information. To expose the system processor config information, patch adds sysfs file called "processor_config" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add enum and sysinfo_counter_request array to get required counter request value in hv-gpci.c file. Also add a new function called "sysinfo_device_attr_create", which will create and return required device attribute to the add_sysinfo_interface_files function. The processor_config sysfs file is only available for power10 and above platforms. Add a new macro called INTERFACE_PROCESSOR_CONFIG_ATTR, which points to the index of NULL placefolder, for processor_config attribute in the interface_attrs array. Also add macro INTERFACE_NULL_ATTR which points to index of NULL attribute in interface_attrs array. Signed-off-by: Kajol Jain --- arch/powerpc/perf/hv-gpci.c | 168 1 file changed, 153 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 225f148f75fd..c74076d3c7a7 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -102,11 +102,21 @@ static ssize_t cpumask_show(struct device *dev, return cpumap_print_to_pagebuf(true, buf, _gpci_cpumask); } -/* Counter request value to retrieve system information */ -#define PROCESSOR_BUS_TOPOLOGY 0XD0 - /* Interface attribute array index to store system information */ #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 +#define INTERFACE_PROCESSOR_CONFIG_ATTR7 +#define INTERFACE_NULL_ATTR8 + +/* Counter request value to retrieve system information */ +enum { + PROCESSOR_BUS_TOPOLOGY, + PROCESSOR_CONFIG +}; + +static int sysinfo_counter_request[] = { + [PROCESSOR_BUS_TOPOLOGY] = 0xD0, + [PROCESSOR_CONFIG] = 0x90, +}; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -187,7 +197,8 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att * starting_index value implies the starting hardware * chip id. */ - ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, 0, 0, buf, , arg); + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + 0, 0, buf, , arg); if (!ret) return n; @@ -220,8 +231,76 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); - ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, starting_index, - 0, buf, , arg); + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + starting_index, 0, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* +* Pass the counter request value 0x90 corresponds to request +* type 'Processor_config', to retrieve +* the system processor information. +* starting_index value implies the starting hardware +* processor index. +*/ + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], + 0, 0, buf, , arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* +* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which +* implies that buffer can't accommodate all information, and a partial buffer +* returned. To handle that, we need to take subsequent requests +* with next starting index to retrieve additional (missing) data. +* Below loop do subsequent hcalls with next starting index and add it +* to buffer util we get all the information. +*/ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* +* Since the starting index is part of counter_value +* buffer elements, use the starting index value in the last +