Re: [PATCH 2/2] PCI/AER: Unexport pci_enable_pcie_error_reporting()

2023-07-10 Thread Christoph Hellwig
Looks good:

Reviewed-by: Christoph Hellwig 


Re: [PATCH 1/2] PCI/AER: Drop unused pci_disable_pcie_error_reporting()

2023-07-10 Thread Christoph Hellwig
Looks good:

Reviewed-by: Christoph Hellwig 


[PATCH v3 6/7] dax/kmem: Always enroll hotplugged memory for memmap_on_memory

2023-07-10 Thread Aneesh Kumar K.V
From: Vishal Verma 

With DAX memory regions originating from CXL memory expanders or
NVDIMMs, the kmem driver may be hot-adding huge amounts of system memory
on a system without enough 'regular' main memory to support the memmap
for it. To avoid this, ensure that all kmem managed hotplugged memory is
added with the MHP_MEMMAP_ON_MEMORY flag to place the memmap on the
new memory region being hot added.

To do this, call add_memory() in chunks of memory_block_size_bytes() as
that is a requirement for memmap_on_memory.

Cc: "Rafael J. Wysocki" 
Cc: Len Brown 
Cc: Andrew Morton 
Cc: David Hildenbrand 
Cc: Oscar Salvador 
Cc: Dan Williams 
Cc: Dave Jiang 
Cc: Dave Hansen 
Cc: Huang Ying 
Signed-off-by: Vishal Verma 
Signed-off-by: Aneesh Kumar K.V 
---
 drivers/dax/kmem.c | 81 +-
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 898ca9505754..840bf7b40a44 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "dax-private.h"
 #include "bus.h"
 
@@ -105,6 +106,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
data->mgid = rc;
 
for (i = 0; i < dev_dax->nr_range; i++) {
+   u64 cur_start, cur_len, remaining;
struct resource *res;
struct range range;
 
@@ -137,21 +139,42 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
res->flags = IORESOURCE_SYSTEM_RAM;
 
/*
-* Ensure that future kexec'd kernels will not treat
-* this as RAM automatically.
+* Add memory in chunks of memory_block_size_bytes() so that
+* it is considered for MHP_MEMMAP_ON_MEMORY
+* @range has already been aligned to memory_block_size_bytes(),
+* so the following loop will always break it down cleanly.
 */
-   rc = add_memory_driver_managed(data->mgid, range.start,
-   range_len(), kmem_name, MHP_NID_IS_MGID);
-
-   if (rc) {
-   dev_warn(dev, "mapping%d: %#llx-%#llx memory add 
failed\n",
-   i, range.start, range.end);
-   remove_resource(res);
-   kfree(res);
-   data->res[i] = NULL;
-   if (mapped)
-   continue;
-   goto err_request_mem;
+   cur_start = range.start;
+   cur_len = memory_block_size_bytes();
+   remaining = range_len();
+   while (remaining) {
+   /*
+* If alignment rules are not satisified we will
+* fallback normal memmap allocation.
+*/
+   mhp_t mhp_flags = MHP_NID_IS_MGID | 
MHP_MEMMAP_ON_MEMORY;
+   /*
+* Ensure that future kexec'd kernels will not treat
+* this as RAM automatically.
+*/
+   rc = add_memory_driver_managed(data->mgid, cur_start,
+  cur_len, kmem_name,
+  mhp_flags);
+
+   if (rc) {
+   dev_warn(dev,
+"mapping%d: %#llx-%#llx memory add 
failed\n",
+i, cur_start, cur_start + cur_len - 1);
+   remove_resource(res);
+   kfree(res);
+   data->res[i] = NULL;
+   if (mapped)
+   continue;
+   goto err_request_mem;
+   }
+
+   cur_start += cur_len;
+   remaining -= cur_len;
}
mapped++;
}
@@ -186,25 +209,39 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 * unbind will succeed even if we return failure.
 */
for (i = 0; i < dev_dax->nr_range; i++) {
+
+   u64 cur_start, cur_len, remaining;
struct range range;
+   bool resource_remove;
int rc;
 
rc = dax_kmem_range(dev_dax, i, );
if (rc)
continue;
 
-   rc = remove_memory(range.start, range_len());
-   if (rc == 0) {
+   resource_remove = true;
+   cur_start = range.start;
+   cur_len = memory_block_size_bytes();
+   remaining = range_len();
+   while (remaining) {
+
+   rc = remove_memory(cur_start, cur_len);
+

[PATCH v3 7/7] mm/hotplug: Embed vmem_altmap details in memory block

2023-07-10 Thread Aneesh Kumar K.V
With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
computing them again when we remove a memory block embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

No functional change in this patch

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/base/memory.c  | 35 ++-
 include/linux/memory.h |  8 ++--
 mm/memory_hotplug.c| 34 ++
 3 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b456ac213610..10aacaecf8de 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -106,6 +106,10 @@ static void memory_block_release(struct device *dev)
 {
struct memory_block *mem = to_memory_block(dev);
 
+   if (mem->altmap) {
+   WARN(mem->altmap->alloc, "Altmap not fully unmapped");
+   kfree(mem->altmap);
+   }
kfree(mem);
 }
 
@@ -183,7 +187,7 @@ static int memory_block_online(struct memory_block *mem)
 {
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-   unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+   unsigned long nr_vmemmap_pages = 0;
struct zone *zone;
int ret;
 
@@ -200,6 +204,9 @@ static int memory_block_online(struct memory_block *mem)
 * stage helps to keep accounting easier to follow - e.g vmemmaps
 * belong to the same zone as the memory they backed.
 */
+   if (mem->altmap)
+   nr_vmemmap_pages = mem->altmap->alloc + mem->altmap->reserve;
+
if (nr_vmemmap_pages) {
ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, 
zone);
if (ret)
@@ -230,7 +237,7 @@ static int memory_block_offline(struct memory_block *mem)
 {
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-   unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+   unsigned long nr_vmemmap_pages = 0;
int ret;
 
if (!mem->zone)
@@ -240,6 +247,9 @@ static int memory_block_offline(struct memory_block *mem)
 * Unaccount before offlining, such that unpopulated zone and kthreads
 * can properly be torn down in offline_pages().
 */
+   if (mem->altmap)
+   nr_vmemmap_pages = mem->altmap->alloc + mem->altmap->reserve;
+
if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
  -nr_vmemmap_pages);
@@ -726,7 +736,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
 #endif
 
 static int add_memory_block(unsigned long block_id, unsigned long state,
-   unsigned long nr_vmemmap_pages,
+   struct vmem_altmap *altmap,
struct memory_group *group)
 {
struct memory_block *mem;
@@ -744,7 +754,14 @@ static int add_memory_block(unsigned long block_id, 
unsigned long state,
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
mem->nid = NUMA_NO_NODE;
-   mem->nr_vmemmap_pages = nr_vmemmap_pages;
+   if (altmap) {
+   mem->altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
+   if (!mem->altmap) {
+   kfree(mem);
+   return -ENOMEM;
+   }
+   memcpy(mem->altmap, altmap, sizeof(*altmap));
+   }
INIT_LIST_HEAD(>group_next);
 
 #ifndef CONFIG_NUMA
@@ -783,14 +800,14 @@ static int __init add_boot_memory_block(unsigned long 
base_section_nr)
if (section_count == 0)
return 0;
return add_memory_block(memory_block_id(base_section_nr),
-   MEM_ONLINE, 0,  NULL);
+   MEM_ONLINE, NULL,  NULL);
 }
 
 static int add_hotplug_memory_block(unsigned long block_id,
-   unsigned long nr_vmemmap_pages,
+   struct vmem_altmap *altmap,
struct memory_group *group)
 {
-   return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
+   return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
 }
 
 static void remove_memory_block(struct memory_block *memory)
@@ -818,7 +835,7 @@ static void remove_memory_block(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-   unsigned long vmemmap_pages,
+   struct vmem_altmap *altmap,
struct memory_group *group)
 {
const 

[PATCH v3 5/7] powerpc/book3s64/memhotplug: Enable memmap on memory for radix

2023-07-10 Thread Aneesh Kumar K.V
Radix vmemmap mapping can map things correctly at the PMD level or PTE
level based on different device boundary checks. Hence we skip the
restrictions w.r.t vmemmap size to be multiple of PMD_SIZE. This also
makes the feature widely useful because to use PMD_SIZE vmemmap area we
require a memory block size of 2GiB

We can also use MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY to that the feature
can work with a memory block size of 256MB. Using altmap.reserve feature
to align things correctly at pageblock granularity. We can end up
losing some pages in memory with this. For ex: with a 256MiB memory block
size, we require 4 pages to map vmemmap pages, In order to align things
correctly we end up adding a reserve of 28 pages. ie, for every 4096
pages 28 pages get reserved.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/pgtable.h| 28 +++
 .../platforms/pseries/hotplug-memory.c|  3 +-
 mm/memory_hotplug.c   |  2 ++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 116d6add0bb0..f890907e5bbf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -157,6 +157,7 @@ config PPC
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
+   select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 68817ea7f994..8e6c92dde6ad 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -169,6 +169,34 @@ static inline bool is_ioremap_addr(const void *x)
 int __meminit vmemmap_populated(unsigned long vmemmap_addr, int 
vmemmap_map_size);
 bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
   unsigned long page_size);
+/*
+ * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ * some of the restrictions. We don't check for PMD_SIZE because our
+ * vmemmap allocation code can fallback correctly. The pageblock
+ * alignment requirement is met using altmap->reserve blocks.
+ */
+#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long size)
+{
+   unsigned long nr_pages = size >> PAGE_SHIFT;
+   unsigned long vmemmap_size = nr_pages * sizeof(struct page);
+
+   if (!radix_enabled())
+   return false;
+
+#ifdef CONFIG_PPC_4K_PAGES
+   return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+#else
+   /*
+* Make sure the vmemmap allocation is fully contianed
+* so that we always allocate vmemmap memory from altmap area.
+* The pageblock alignment requirement is met by using
+* reserve blocks in altmap.
+*/
+   return IS_ALIGNED(vmemmap_size,  PAGE_SIZE);
+#endif
+}
+
 #endif /* CONFIG_PPC64 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9c62c2c3b3d0..1447509357a7 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -617,6 +617,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, 
u32 drc_index)
 
 static int dlpar_add_lmb(struct drmem_lmb *lmb)
 {
+   mhp_t mhp_flags = MHP_NONE | MHP_MEMMAP_ON_MEMORY;
unsigned long block_sz;
int nid, rc;
 
@@ -637,7 +638,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
nid = first_online_node;
 
/* Add the memory */
-   rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
+   rc = __add_memory(nid, lmb->base_addr, block_sz, mhp_flags);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f36aec1f7626..0c4d3fdd31a2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -2108,6 +2108,8 @@ static int __ref try_remove_memory(u64 start, u64 size)
 * right thing if we used vmem_altmap when hot-adding
 * the range.
 */
+   mhp_altmap.base_pfn = PHYS_PFN(start);
+   mhp_altmap.free = PHYS_PFN(size) - nr_vmemmap_pages;
mhp_altmap.alloc = nr_vmemmap_pages;
altmap = _altmap;
}
-- 
2.41.0



[PATCH v3 4/7] mm/hotplug: Allow pageblock alignment via altmap reservation

2023-07-10 Thread Aneesh Kumar K.V
Add a new kconfig option that can be selected if we want to allow
pageblock alignment by reserving pages in the vmemmap altmap area.
This implies we will be reserving some pages for every memoryblock
This also allows the memmap on memory feature to be widely useful
with different memory block size values.

Signed-off-by: Aneesh Kumar K.V 
---
 mm/Kconfig  |  9 +++
 mm/memory_hotplug.c | 59 +
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 932349271e28..88a1472b2086 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -570,6 +570,15 @@ config MHP_MEMMAP_ON_MEMORY
depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP
depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 
+config MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY
+   bool "Allow Reserving pages for page block aligment"
+   depends on MHP_MEMMAP_ON_MEMORY
+   help
+   This option allows memmap on memory feature to be more useful
+   with different memory block sizes. This is achieved by marking some 
pages
+   in each memory block as reserved so that we can get page-block alignment
+   for the remaining pages.
+
 endif # MEMORY_HOTPLUG
 
 config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 07c99b0cc371..f36aec1f7626 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1252,15 +1252,17 @@ static inline bool 
arch_supports_memmap_on_memory(unsigned long size)
 {
unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT;
unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
-   unsigned long remaining_size = size - vmemmap_size;
 
-   return IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
-   IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT));
+   return IS_ALIGNED(vmemmap_size, PMD_SIZE);
 }
 #endif
 
 static bool mhp_supports_memmap_on_memory(unsigned long size)
 {
+   unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT;
+   unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
+   unsigned long remaining_size = size - vmemmap_size;
+
/*
 * Besides having arch support and the feature enabled at runtime, we
 * need a few more assumptions to hold true:
@@ -1287,9 +1289,30 @@ static bool mhp_supports_memmap_on_memory(unsigned long 
size)
 *   altmap as an alternative source of memory, and we do not 
exactly
 *   populate a single PMD.
 */
-   return mhp_memmap_on_memory() &&
-   size == memory_block_size_bytes() &&
-   arch_supports_memmap_on_memory(size);
+   if (!mhp_memmap_on_memory() || size != memory_block_size_bytes())
+   return false;
+/*
+ * Without page reservation remaining pages should be pageblock 
aligned.
+ */
+   if (!IS_ENABLED(CONFIG_MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY) &&
+   !IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)))
+   return false;
+
+   return arch_supports_memmap_on_memory(size);
+}
+
+static inline unsigned long memory_block_align_base(unsigned long size)
+{
+   if (IS_ENABLED(CONFIG_MHP_RESERVE_PAGES_MEMMAP_ON_MEMORY)) {
+   unsigned long align;
+   unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT;
+   unsigned long vmemmap_size;
+
+   vmemmap_size = (nr_vmemmap_pages * sizeof(struct page)) >> 
PAGE_SHIFT;
+   align = pageblock_align(vmemmap_size) - vmemmap_size;
+   return align;
+   } else
+   return 0;
 }
 
 /*
@@ -1302,7 +1325,11 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
 {
struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
enum memblock_flags memblock_flags = MEMBLOCK_NONE;
-   struct vmem_altmap mhp_altmap = {};
+   struct vmem_altmap mhp_altmap = {
+   .base_pfn =  PHYS_PFN(res->start),
+   .end_pfn  =  PHYS_PFN(res->end),
+   .reserve  = memory_block_align_base(resource_size(res)),
+   };
struct memory_group *group = NULL;
u64 start, size;
bool new_node = false;
@@ -1347,8 +1374,7 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
 */
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
if (mhp_supports_memmap_on_memory(size)) {
-   mhp_altmap.free = PHYS_PFN(size);
-   mhp_altmap.base_pfn = PHYS_PFN(start);
+   mhp_altmap.free = PHYS_PFN(size) - mhp_altmap.reserve;
params.altmap = _altmap;
}
/* fallback to not using altmap  */
@@ -1360,7 +1386,7 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
goto error;
 
/* create memory block devices after memory 

[PATCH v3 3/7] mm/hotplug: Allow architecture to override memmap on memory support check

2023-07-10 Thread Aneesh Kumar K.V
Some architectures would want different restrictions. Hence add an
architecture-specific override.

Both the PMD_SIZE check and pageblock alignment check are moved there.

Signed-off-by: Aneesh Kumar K.V 
---
 mm/memory_hotplug.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1b19462f4e72..07c99b0cc371 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1247,12 +1247,20 @@ static int online_memory_block(struct memory_block 
*mem, void *arg)
return device_online(>dev);
 }
 
-static bool mhp_supports_memmap_on_memory(unsigned long size)
+#ifndef arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long size)
 {
-   unsigned long nr_vmemmap_pages = size / PAGE_SIZE;
+   unsigned long nr_vmemmap_pages = size >> PAGE_SHIFT;
unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
unsigned long remaining_size = size - vmemmap_size;
 
+   return IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
+   IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT));
+}
+#endif
+
+static bool mhp_supports_memmap_on_memory(unsigned long size)
+{
/*
 * Besides having arch support and the feature enabled at runtime, we
 * need a few more assumptions to hold true:
@@ -1280,9 +1288,8 @@ static bool mhp_supports_memmap_on_memory(unsigned long 
size)
 *   populate a single PMD.
 */
return mhp_memmap_on_memory() &&
-  size == memory_block_size_bytes() &&
-  IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
-  IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT));
+   size == memory_block_size_bytes() &&
+   arch_supports_memmap_on_memory(size);
 }
 
 /*
-- 
2.41.0



[PATCH v3 2/7] mm/hotplug: Allow memmap on memory hotplug request to fallback

2023-07-10 Thread Aneesh Kumar K.V
If not supported, fallback to not using memap on memmory. This avoids
the need for callers to do the fallback.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/acpi/acpi_memhotplug.c |  3 +--
 include/linux/memory_hotplug.h |  1 -
 mm/memory_hotplug.c| 13 ++---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 24f662d8bd39..d0c1a71007d0 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -211,8 +211,7 @@ static int acpi_memory_enable_device(struct 
acpi_memory_device *mem_device)
if (!info->length)
continue;
 
-   if (mhp_supports_memmap_on_memory(info->length))
-   mhp_flags |= MHP_MEMMAP_ON_MEMORY;
+   mhp_flags |= MHP_MEMMAP_ON_MEMORY;
result = __add_memory(mgid, info->start_addr, info->length,
  mhp_flags);
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 013c69753c91..96f6127f197f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -354,7 +354,6 @@ extern struct zone *zone_for_pfn_range(int online_type, int 
nid,
 extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
  struct mhp_params *params);
 void arch_remove_linear_mapping(u64 start, u64 size);
-extern bool mhp_supports_memmap_on_memory(unsigned long size);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3f231cf1b410..1b19462f4e72 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1247,7 +1247,7 @@ static int online_memory_block(struct memory_block *mem, 
void *arg)
return device_online(>dev);
 }
 
-bool mhp_supports_memmap_on_memory(unsigned long size)
+static bool mhp_supports_memmap_on_memory(unsigned long size)
 {
unsigned long nr_vmemmap_pages = size / PAGE_SIZE;
unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
@@ -1339,13 +1339,12 @@ int __ref add_memory_resource(int nid, struct resource 
*res, mhp_t mhp_flags)
 * Self hosted memmap array
 */
if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
-   if (!mhp_supports_memmap_on_memory(size)) {
-   ret = -EINVAL;
-   goto error;
+   if (mhp_supports_memmap_on_memory(size)) {
+   mhp_altmap.free = PHYS_PFN(size);
+   mhp_altmap.base_pfn = PHYS_PFN(start);
+   params.altmap = _altmap;
}
-   mhp_altmap.free = PHYS_PFN(size);
-   mhp_altmap.base_pfn = PHYS_PFN(start);
-   params.altmap = _altmap;
+   /* fallback to not using altmap  */
}
 
/* call arch's memory hotadd */
-- 
2.41.0



[PATCH v3 1/7] mm/hotplug: Simplify ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE kconfig

2023-07-10 Thread Aneesh Kumar K.V
Instead of adding menu entry with all supported architectures, add
mm/Kconfig variable and select the same from supported architectures.

No functional change in this patch.

Acked-by: David Hildenbrand 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/arm64/Kconfig | 4 +---
 arch/x86/Kconfig   | 4 +---
 mm/Kconfig | 3 +++
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7856c3a3e35a..7e5985c018f8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -78,6 +78,7 @@ config ARM64
select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
select ARCH_KEEP_MEMBLOCK
+   select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_GNU_PROPERTY
select ARCH_USE_MEMTEST
@@ -346,9 +347,6 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
def_bool y
 
-config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
-   def_bool y
-
 config SMP
def_bool y
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78224aa76409..d0258e92a8af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -102,6 +102,7 @@ config X86
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+   select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -2610,9 +2611,6 @@ config ARCH_HAS_ADD_PAGES
def_bool y
depends on ARCH_ENABLE_MEMORY_HOTPLUG
 
-config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
-   def_bool y
-
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
diff --git a/mm/Kconfig b/mm/Kconfig
index 923bd35f81f2..932349271e28 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -572,6 +572,9 @@ config MHP_MEMMAP_ON_MEMORY
 
 endif # MEMORY_HOTPLUG
 
+config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
+   bool
+
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
-- 
2.41.0



[PATCH v3 0/7] Add support for memmap on memory feature on ppc64

2023-07-10 Thread Aneesh Kumar K.V
This patch series update memmap on memory feature to fall back to
memmap allocation outside the memory block if the alignment rules are
not met. This makes the feature more useful on architectures like
ppc64 where alignment rules are different with 64K page size.

This patch series is dependent on dax vmemmap optimization series
posted here
https://lore.kernel.org/linux-mm/20230710160842.56300-1-aneesh.ku...@linux.ibm.com


Changes from v2:
* Rebase to latest linus tree
* Redo the series based on review feedback. Multiple changes to the patchset.

Changes from v1:
* update the memblock to store vmemmap_altmap details. This is required
so that when we remove the memory we can find the altmap details which
is needed on some architectures.
* rebase to latest linus tree


Aneesh Kumar K.V (6):
  mm/hotplug: Simplify ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE kconfig
  mm/hotplug: Allow memmap on memory hotplug request to fallback
  mm/hotplug: Allow architecture to override memmap on memory support
check
  mm/hotplug: Allow pageblock alignment via altmap reservation
  powerpc/book3s64/memhotplug: Enable memmap on memory for radix
  mm/hotplug: Embed vmem_altmap details in memory block

Vishal Verma (1):
  dax/kmem: Always enroll hotplugged memory for memmap_on_memory

 arch/arm64/Kconfig|   4 +-
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/pgtable.h|  28 +
 .../platforms/pseries/hotplug-memory.c|   3 +-
 arch/x86/Kconfig  |   4 +-
 drivers/acpi/acpi_memhotplug.c|   3 +-
 drivers/base/memory.c |  35 --
 drivers/dax/kmem.c|  81 ++
 include/linux/memory.h|   8 +-
 include/linux/memory_hotplug.h|   1 -
 mm/Kconfig|  12 ++
 mm/memory_hotplug.c   | 103 --
 12 files changed, 205 insertions(+), 78 deletions(-)

-- 
2.41.0



Re: [PATCH v5 21/38] powerpc: Implement the new page table range API

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 22:43, Matthew Wilcox (Oracle) a écrit :
> Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
> Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to
> per-folio.
> 
> Signed-off-by: Matthew Wilcox (Oracle) 
> Acked-by: Mike Rapoport (IBM) 
> Cc: Michael Ellerman 
> Cc: Nicholas Piggin 
> Cc: Christophe Leroy 
> Cc: linuxppc-dev@lists.ozlabs.org

Reviewed-by: Christophe Leroy 

> ---
>   arch/powerpc/include/asm/book3s/32/pgtable.h |  5 --
>   arch/powerpc/include/asm/book3s/64/pgtable.h |  6 +--
>   arch/powerpc/include/asm/book3s/pgtable.h| 11 ++---
>   arch/powerpc/include/asm/cacheflush.h| 14 --
>   arch/powerpc/include/asm/kvm_ppc.h   | 10 ++--
>   arch/powerpc/include/asm/nohash/pgtable.h| 16 ++
>   arch/powerpc/include/asm/pgtable.h   | 12 +
>   arch/powerpc/mm/book3s64/hash_utils.c| 11 +++--
>   arch/powerpc/mm/cacheflush.c | 40 +--
>   arch/powerpc/mm/nohash/e500_hugetlbpage.c|  3 +-
>   arch/powerpc/mm/pgtable.c| 51 +++-
>   11 files changed, 86 insertions(+), 93 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 7bf1fe7297c6..5f12b9382909 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
> pgprot)
>pgprot_val(pgprot));
>   }
>   
> -static inline unsigned long pte_pfn(pte_t pte)
> -{
> - return pte_val(pte) >> PTE_RPN_SHIFT;
> -}
> -
>   /* Generic modifiers for PTE bits */
>   static inline pte_t pte_wrprotect(pte_t pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 4acc9690f599..c5baa3082a5a 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -104,6 +104,7 @@
>* and every thing below PAGE_SHIFT;
>*/
>   #define PTE_RPN_MASK(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
> +#define PTE_RPN_SHIFTPAGE_SHIFT
>   /*
>* set of bits not changed in pmd_modify. Even though we have hash specific 
> bits
>* in here, on radix we expect them to be zero.
> @@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
> pgprot)
>   return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | 
> _PAGE_PTE);
>   }
>   
> -static inline unsigned long pte_pfn(pte_t pte)
> -{
> - return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
> -}
> -
>   /* Generic modifiers for PTE bits */
>   static inline pte_t pte_wrprotect(pte_t pte)
>   {
> diff --git a/arch/powerpc/include/asm/book3s/pgtable.h 
> b/arch/powerpc/include/asm/book3s/pgtable.h
> index d18b748ea3ae..3b7bd36a2321 100644
> --- a/arch/powerpc/include/asm/book3s/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/pgtable.h
> @@ -9,13 +9,6 @@
>   #endif
>   
>   #ifndef __ASSEMBLY__
> -/* Insert a PTE, top-level function is out of line. It uses an inline
> - * low level function in the respective pgtable-* files
> - */
> -extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> -pte_t pte);
> -
> -
>   #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
>   extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long 
> address,
>pte_t *ptep, pte_t entry, int dirty);
> @@ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, 
> unsigned long address, pte_t
>* corresponding HPTE into the hash table ahead of time, instead of
>* waiting for the inevitable extra hash-table miss exception.
>*/
> -static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned 
> long address, pte_t *ptep)
> +static inline void update_mmu_cache_range(struct vm_fault *vmf,
> + struct vm_area_struct *vma, unsigned long address,
> + pte_t *ptep, unsigned int nr)
>   {
>   if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE))
>   return;
> diff --git a/arch/powerpc/include/asm/cacheflush.h 
> b/arch/powerpc/include/asm/cacheflush.h
> index 7564dd4fd12b..ef7d2de33b89 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, 
> unsigned long end)
>* It just marks the page as not i-cache clean.  We do the i-cache
>* flush later when the page is given to a user process, if necessary.
>*/
> -static inline void flush_dcache_page(struct page *page)
> +static inline void flush_dcache_folio(struct folio *folio)
>   {
>   if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
>   return;
>   /* avoid an atomic op if possible */
> - if (test_bit(PG_dcache_clean, >flags))
> -  

Re: [PATCH v4 20/36] powerpc: Implement the new page table range API

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 22:24, Matthew Wilcox a écrit :
> On Sat, Mar 18, 2023 at 09:19:04AM +, Christophe Leroy wrote:
>> void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>>  pte_t pte, unsigned int nr)
>> {
>>  pgprot_t prot;
>>  unsigned long pfn;
>>  /*
>>   * Make sure hardware valid bit is not set. We don't do
>>   * tlb flush for this update.
>>   */
>>  VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
>>
>>  /* Note: mm->context.id might not yet have been assigned as
>>   * this context might not have been activated yet when this
>>   * is called.
>>   */
>>  pte = set_pte_filter(pte);
>>
>>  prot = pte_pgprot(pte);
>>  pfn = pte_pfn(pte);
>>  /* Perform the setting of the PTE */
>>  for (;;) {
>>  __set_pte_at(mm, addr, ptep, pfn_pte(pfn, prot), 0);
>>  if (--nr == 0)
>>  break;
>>  ptep++;
>>  pfn++;
>>  addr += PAGE_SIZE;
>>  }
>> }
> 
> I'd rather the per-arch code were as similar to each other and the
> generic implementation as possible.  Fewer bugs that way and easier
> for other people to make changes that have to touch every architecture
> in the future.

I understand your point but I dislike the idea of open coding pte 
manipulations when you have helpers for that. If you had used helpers 
from the begining you wouldn't have had the mishap you had in v4.


Re: [PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works

2023-07-10 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
>> Architectures like powerpc will like to use different page table allocators
>> and mapping mechanisms to implement vmemmap optimization. Similar to
>> vmemmap_populate allow architectures to implement
>> vmemap_populate_compound_pages
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>   mm/sparse-vmemmap.c | 3 +++
>>   1 file changed, 3 insertions(+)
>> 
>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>> index a044a130405b..541b3f69a481 100644
>> --- a/mm/sparse-vmemmap.c
>> +++ b/mm/sparse-vmemmap.c
>> @@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
>>  start, end - 1);
>>   }
>>   
>> +#ifndef vmemmap_populate_compound_pages
>>   pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
>> node,
>> struct vmem_altmap *altmap,
>> struct page *reuse)
>
> Should vmemmap_pte_populate() be static ?
>
> It looks odd to exclude a non-static function based on a non related macro.
>
> There are several such function in the block being excluded here. Can 
> you explain why it is correct to do that ?
>


Those functions can actually be made static. But I will do that as a
part of different patch. I will update this patch and make sure the
 #ifdef will only override the vmemmap_populate_compound_pages.

modified   mm/sparse-vmemmap.c
@@ -141,7 +141,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
start, end - 1);
 }

-#ifndef vmemmap_populate_compound_pages
 pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
node,
   struct vmem_altmap *altmap,
   struct page *reuse)
@@ -359,6 +358,7 @@ int __meminit vmemmap_populate_hugepages(unsigned long 
start, unsigned long end,
return 0;
 }

+#ifndef vmemmap_populate_compound_pages
 /*
  * For compound pages bigger than section size (e.g. x86 1G compound
  * pages with 2M subsection size) fill the rest of sections as tail

-aneesh



Re: [PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override

2023-07-10 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
>> dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within
>> vmemmap such that tail page mapping can point to the second PAGE_SIZE area.
>> Enforce that in vmemmap_can_optimize() function.
>> 
>> Architectures like powerpc also want to enable vmemmap optimization
>> conditionally (only with radix MMU translation). Hence allow architecture
>> override.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>
> Reviewed-by: Christophe Leroy 
>
> Why renaming vmemmap_can_optimize() to __vmemmap_can_optimize() and keep 
> it when vmemmap_can_optimize() has been override ? Is that because you 
> expect overriding version of vmemmap_can_optimize() to call 
> __vmemmap_can_optimize() ?
>

Yes, __vmemap_can_optimize will be used in patch 11
https://lore.kernel.org/linuxppc-dev/20230710160842.56300-12-aneesh.ku...@linux.ibm.com

-aneesh



Re: [PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME

2023-07-10 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
>> This helps architectures to override pmd_same and pud_same independently.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>
> Reviewed-by: Christophe Leroy 
>
> Shouldn't you do it the modern way and use #ifndef pud_same instead of a 
> new __HAVE_ARCH_PUD_SAME like in the old days ?
>

Sure will update. I was following existing pmd_same override. But I also
agree #ifndef pud_same is better.



>
>> ---
>>   include/linux/pgtable.h | 2 ++
>>   1 file changed, 2 insertions(+)
>> 
>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
>> index 6fd9b2831338..91def34f7784 100644
>> --- a/include/linux/pgtable.h
>> +++ b/include/linux/pgtable.h
>> @@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
>>   {
>>  return pmd_val(pmd_a) == pmd_val(pmd_b);
>>   }
>> +#endif
>>   
>> +#ifndef __HAVE_ARCH_PUD_SAME
>>   static inline int pud_same(pud_t pud_a, pud_t pud_b)
>>   {
>>  return pud_val(pud_a) == pud_val(pud_b);

-aneesh


Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Pingfan Liu
Hi Ming,

Having no [PATCH 1/2] blk-mq: add blk_mq_max_nr_hw_queues() in inbox.
So I reply here.

At first glance, I think that  the cpu hot plug callback hook should
be the remedy for the newly introduced blk_mq_max_nr_hw_queues(),
although it is more complicated.

Consider the scene where nr_cpus=4, which can speed up the dumping
process, the blk_mq_max_nr_hw_queues() can not utilize the other three
cpus.


Thanks,

Pingfan

On Mon, Jul 10, 2023 at 5:16 PM Ming Lei  wrote:
>
> On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > Take blk-mq's knowledge into account for calculating io queues.
> > >
> > > Fix wrong queue mapping in case of kdump kernel.
> > >
> > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > still returns all CPUs.
> >
> > That's simply broken.  Please fix the arch code to make sure
> > it does not return a bogus num_possible_cpus value for these
>
> That is documented in Documentation/admin-guide/kdump/kdump.rst.
>
> On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"
> simply keep one of CPU cores as online, and others as offline.
>
> So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for
> kdump kernel.
>
> > setups, otherwise you'll have to paper over it in all kind of
> > drivers.
>
> The issue is only triggered for drivers which use managed irq &
> multiple hw queues.
>
>
> Thanks,
> Ming
>
>
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
>



Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Pingfan Liu
On Mon, Jul 10, 2023 at 5:16 PM Ming Lei  wrote:
>
> On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > Take blk-mq's knowledge into account for calculating io queues.
> > >
> > > Fix wrong queue mapping in case of kdump kernel.
> > >
> > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > still returns all CPUs.
> >
> > That's simply broken.  Please fix the arch code to make sure
> > it does not return a bogus num_possible_cpus value for these
>

In fact, num_possible_cpus is not broken.

Quote from admin-guide/kernel-parameters.txt
   maxcpus=[SMP] Maximum number of processors that an SMP kernel
   will bring up during bootup.  maxcpus=n : n >= 0 limits
   the kernel to bring up 'n' processors. Surely after
   bootup you can bring up the other plugged cpu
by executing
   "echo 1 > /sys/devices/system/cpu/cpuX/online".
So maxcpus
   only takes effect during system bootup.
   While n=0 is a special case, it is equivalent to "nosmp",
   which also disables the IO APIC.

Here, as it explained, maxcpus only affects the bootup, later, extra
cpus can be online.

> That is documented in Documentation/admin-guide/kdump/kdump.rst.
>
> On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"

On aarch64 and x86, nr_cpus=1 is used, while on ppc64, due to the
implementation, nr_cpus=1 can not be supported.


Thanks,

Pingfan

> simply keep one of CPU cores as online, and others as offline.
>
> So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for
> kdump kernel.
>
> > setups, otherwise you'll have to paper over it in all kind of
> > drivers.
>
> The issue is only triggered for drivers which use managed irq &
> multiple hw queues.
>
>
> Thanks,
> Ming
>
>
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
>



Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Ming Lei
Hi Baoquan,

On Tue, Jul 11, 2023 at 11:35:50AM +0800, Baoquan He wrote:
> On 07/10/23 at 05:14pm, Ming Lei wrote:
> > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > > Take blk-mq's knowledge into account for calculating io queues.
> > > > 
> > > > Fix wrong queue mapping in case of kdump kernel.
> > > > 
> > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > > still returns all CPUs.
> > > 
> > > That's simply broken.  Please fix the arch code to make sure
> > > it does not return a bogus num_possible_cpus value for these
> > 
> > That is documented in Documentation/admin-guide/kdump/kdump.rst.
> > 
> > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"
> > simply keep one of CPU cores as online, and others as offline.
> 
> I don't know maxcpus on arm and ppc64 well. But maxcpus=1 or nr_cpus=1
> are suggested parameter. Because usually nr_cpus=1 is enough to make
> kdump kernel work well to capture vmcore. However, user is allowed to
> specify nr_cpus=n (n>1) if they think multiple cpus are needed in kdump
> kernel. Your hard coding of cpu number in kdump kernel may be not so
> reasonable.

As I mentioned, for arm/ppc64, passing 'maxcpus=1' actually follows
Documentation/admin-guide/kdump/kdump.rst.

'nr_cpus=N' just works fine, so not related with this topic.

After 'maxcpus=1' is passed, kernel only brings up one of cpu cores as
online during booting, and others still can be put into online by
userspace. Now this way causes IO timeout on some storage device which
uses managed irq and supports multiple io queues.

Here the focus is if passing 'maxcpus=1' is valid for kdump
kernel, that is we want to hear from our arch/kdump guys.

If yes, something needs to be fixed, such as, what this patchset is
doing.

> 
> Please cc kexec mailing list when posting so that people can view the
> whole thread of discussion.

Already Cc kexe & arm/powerpc & irq list.


Thanks,
Ming



Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Baoquan He
On 07/10/23 at 05:14pm, Ming Lei wrote:
> On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > Take blk-mq's knowledge into account for calculating io queues.
> > > 
> > > Fix wrong queue mapping in case of kdump kernel.
> > > 
> > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > still returns all CPUs.
> > 
> > That's simply broken.  Please fix the arch code to make sure
> > it does not return a bogus num_possible_cpus value for these
> 
> That is documented in Documentation/admin-guide/kdump/kdump.rst.
> 
> On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"
> simply keep one of CPU cores as online, and others as offline.

I don't know maxcpus on arm and ppc64 well. But maxcpus=1 or nr_cpus=1
are suggested parameter. Because usually nr_cpus=1 is enough to make
kdump kernel work well to capture vmcore. However, user is allowed to
specify nr_cpus=n (n>1) if they think multiple cpus are needed in kdump
kernel. Your hard coding of cpu number in kdump kernel may be not so
reasonable.

Please cc kexec mailing list when posting so that people can view the
whole thread of discussion.

Thanks
Baoquan

> 
> So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for
> kdump kernel.
> 
> > setups, otherwise you'll have to paper over it in all kind of
> > drivers.
> 
> The issue is only triggered for drivers which use managed irq &
> multiple hw queues.
> 
> 
> Thanks,
> Ming
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 



Re: [PATCH v7 5/8] KVM: x86/mmu: Don't pass FOLL_GET to __kvm_follow_pfn

2023-07-10 Thread David Stevens
On Tue, Jul 11, 2023 at 1:34 AM Isaku Yamahata  wrote:
>
> On Fri, Jul 07, 2023 at 10:35:02AM +0900,
> David Stevens  wrote:
>
> > > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > > > index e44ab512c3a1..b1607e314497 100644
> > > > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > > >
> > > > > ...
> > > > >
> > > > > > @@ -2937,6 +2943,7 @@ static int mmu_set_spte(struct kvm_vcpu 
> > > > > > *vcpu, struct kvm_memory_slot *slot,
> > > > > >   bool host_writable = !fault || fault->map_writable;
> > > > > >   bool prefetch = !fault || fault->prefetch;
> > > > > >   bool write_fault = fault && fault->write;
> > > > > > + bool is_refcounted = !fault || fault->is_refcounted_page;
> > > > >
> > > > > Just wonder, what if a non-refcounted page is prefetched?  Or is it 
> > > > > possible in
> > > > > practice?
> > > >
> > > > Prefetching is still done via gfn_to_page_many_atomic, which sets
> > > > FOLL_GET. That's fixable, but it's not something this series currently
> > > > does.
> > >
> > > So if we prefetch a page, REFCOUNTED bit is cleared unconditionally with 
> > > this
> > > hunk.  kvm_set_page_{dirty, accessed} won't be called as expected for 
> > > prefetched
> > > spte.  If I read the patch correctly, REFCOUNTED bit in SPTE should 
> > > represent
> > > whether the corresponding page is ref-countable or not, right?
> > >
> > > Because direct_pte_prefetch_many() is for legacy KVM MMU and 
> > > FNAME(prefetch_pte)
> > > is shadow paging, we need to test it with legacy KVM MMU or shadow paging 
> > > to hit
> > > the issue, though.
> > >
> >
> > direct_pte_prefetch_many and prefetch_gpte both pass NULL for the
> > fault parameter, so is_refcounted will evaluate to true. So the spte's
> > refcounted bit will get set in that case.
>
> Oops, my bad.  My point is "unconditionally".  Is the bit always set for
> non-refcountable pages?  Or non-refcountable pages are not prefeched?

The bit is never set for non-refcounted pages, and is always set for
refcounted pages. The current series never prefetches non-refcounted
pages, since it continues to use the gfn_to_page_many_atomic API.

-David


[PATCH net-next v3 01/10] net: wan: Remove unnecessary (void*) conversions

2023-07-10 Thread Wu Yunchuan
From: wuych 

Pointer variables of void * type do not require type cast.

Signed-off-by: Wu Yunchuan 
---
 drivers/net/wan/fsl_ucc_hdlc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 47c2ad7a3e42..91e37c3dcbee 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -350,7 +350,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
 static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
 {
hdlc_device *hdlc = dev_to_hdlc(dev);
-   struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv;
+   struct ucc_hdlc_private *priv = hdlc->priv;
struct qe_bd *bd;
u16 bd_status;
unsigned long flags;
-- 
2.30.2



[powerpc:merge] BUILD SUCCESS 20125a0e8655abec375153cc23cc708ffa8c4380

2023-07-10 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: 20125a0e8655abec375153cc23cc708ffa8c4380  Automatic merge of 
'master' into merge (2023-07-10 09:49)

elapsed time: 1249m

configs tested: 169
configs skipped: 9

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
alpharandconfig-r032-20230710   gcc  
arc  alldefconfig   gcc  
arc  allyesconfig   gcc  
arc defconfig   gcc  
arc  randconfig-r002-20230710   gcc  
arc  randconfig-r014-20230710   gcc  
arc  randconfig-r025-20230710   gcc  
arc  randconfig-r031-20230710   gcc  
arc  randconfig-r043-20230710   gcc  
arm  allmodconfig   gcc  
arm  allyesconfig   gcc  
arm axm55xx_defconfig   gcc  
arm  collie_defconfig   clang
arm defconfig   gcc  
arm lpc18xx_defconfig   gcc  
arm  randconfig-r012-20230710   gcc  
arm  randconfig-r046-20230710   gcc  
arm socfpga_defconfig   clang
arm  sp7021_defconfig   clang
armspear3xx_defconfig   clang
arm   spitz_defconfig   clang
arm64alldefconfig   gcc  
arm64allyesconfig   gcc  
arm64   defconfig   gcc  
cskydefconfig   gcc  
csky randconfig-r011-20230710   gcc  
csky randconfig-r013-20230710   gcc  
hexagon  randconfig-r016-20230710   clang
hexagon  randconfig-r036-20230710   clang
hexagon  randconfig-r041-20230710   clang
hexagon  randconfig-r045-20230710   clang
i386 allyesconfig   gcc  
i386 buildonly-randconfig-r004-20230710   gcc  
i386 buildonly-randconfig-r005-20230710   gcc  
i386 buildonly-randconfig-r006-20230710   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
i386 randconfig-i001-20230710   gcc  
i386 randconfig-i002-20230710   gcc  
i386 randconfig-i003-20230710   gcc  
i386 randconfig-i004-20230710   gcc  
i386 randconfig-i005-20230710   gcc  
i386 randconfig-i006-20230710   gcc  
i386 randconfig-i011-20230710   clang
i386 randconfig-i012-20230710   clang
i386 randconfig-i013-20230710   clang
i386 randconfig-i014-20230710   clang
i386 randconfig-i015-20230710   clang
i386 randconfig-i016-20230710   clang
i386 randconfig-r004-20230710   gcc  
i386 randconfig-r032-20230710   gcc  
i386 randconfig-r033-20230710   gcc  
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarch   defconfig   gcc  
loongarchrandconfig-r002-20230710   gcc  
loongarchrandconfig-r035-20230710   gcc  
m68k allmodconfig   gcc  
m68k allyesconfig   gcc  
m68k  amiga_defconfig   gcc  
m68kdefconfig   gcc  
m68km5407c3_defconfig   gcc  
m68k randconfig-r016-20230710   gcc  
m68kstmark2_defconfig   gcc  
microblaze  mmu_defconfig   gcc  
microblaze   randconfig-r015-20230710   gcc  
mips allmodconfig   gcc  
mips allyesconfig   gcc  
mips  ath25_defconfig   clang
mipsbcm47xx_defconfig   gcc  
mips   bmips_be_defconfig   gcc  
mips  bmips_stb_defconfig   clang
mips  cavium_octeon_defconfig   clang
mips cobalt_defconfig   gcc  
mips   ip27_defconfig   clang
mips   jazz_defconfig   gcc  
mips  loongson3_defconfig   gcc  
mips randconfig-r024-20230710   gcc  
mips randconfig-r034-20230710   clang
nios2   defconfig   gcc  
nios2randconfig-r011-20230710   gcc  
nios2randconfig-r014-20230710   gcc  
openrisc randconfig-r015-20230710   gcc  
openrisc

[powerpc:fixes-test] BUILD SUCCESS cf53564b11cef5cdfafc548b172345c9aa753f89

2023-07-10 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
fixes-test
branch HEAD: cf53564b11cef5cdfafc548b172345c9aa753f89  
powerpc/mm/book3s64/hash/4k: Add pmd_same callback for 4K page size

elapsed time: 1249m

configs tested: 51
configs skipped: 179

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
arc  allyesconfig   gcc  
arm  allmodconfig   gcc  
arm  allyesconfig   gcc  
arm  collie_defconfig   clang
arm defconfig   gcc  
arm64allyesconfig   gcc  
arm64   defconfig   gcc  
cskydefconfig   gcc  
hexagon  randconfig-r041-20230710   clang
hexagon  randconfig-r045-20230710   clang
i386 allyesconfig   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarch   defconfig   gcc  
m68k allmodconfig   gcc  
m68k allyesconfig   gcc  
m68kdefconfig   gcc  
mips allmodconfig   gcc  
mips allyesconfig   gcc  
mips  ath25_defconfig   clang
powerpc  allmodconfig   gcc  
powerpc   allnoconfig   gcc  
powerpccell_defconfig   gcc  
powerpcge_imp3a_defconfig   clang
powerpc   maple_defconfig   gcc  
powerpc mpc832x_rdb_defconfig   clang
powerpc mpc83xx_defconfig   gcc  
powerpc  ppc40x_defconfig   gcc  
powerpc  randconfig-r004-20230710   gcc  
powerpc  randconfig-r006-20230710   gcc  
powerpc  randconfig-r026-20230710   clang
powerpc sequoia_defconfig   gcc  
powerpc skiroot_defconfig   clang
powerpc xes_mpc85xx_defconfig   clang
riscvallmodconfig   gcc  
riscv allnoconfig   gcc  
riscvallyesconfig   gcc  
riscv   defconfig   gcc  
riscvrandconfig-r042-20230710   clang
riscv  rv32_defconfig   gcc  
s390 alldefconfig   clang
s390 randconfig-r044-20230710   clang
sh kfr2r09-romimage_defconfig   gcc  
sparcallyesconfig   gcc  
sparc   defconfig   gcc  
x86_64  kexec   gcc  
xtensa  nommu_kc705_defconfig   gcc  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Ming Lei
On Mon, Jul 10, 2023 at 10:51:43AM -0600, Keith Busch wrote:
> On Mon, Jul 10, 2023 at 05:14:15PM +0800, Ming Lei wrote:
> > On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > > Take blk-mq's knowledge into account for calculating io queues.
> > > > 
> > > > Fix wrong queue mapping in case of kdump kernel.
> > > > 
> > > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > > still returns all CPUs.
> > > 
> > > That's simply broken.  Please fix the arch code to make sure
> > > it does not return a bogus num_possible_cpus value for these
> > 
> > That is documented in Documentation/admin-guide/kdump/kdump.rst.
> > 
> > On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"
> > simply keep one of CPU cores as online, and others as offline.
> > 
> > So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for
> > kdump kernel.
> > 
> > > setups, otherwise you'll have to paper over it in all kind of
> > > drivers.
> > 
> > The issue is only triggered for drivers which use managed irq &
> > multiple hw queues.
> 
> Is the problem that the managed interrupt sets the effective irq
> affinity to an offline CPU? You mentioned observed timeouts; are you

Yes, the problem is that blk-mq only creates hctx0, so nvme-pci
translate it into hctx0's nvme_queue, this way is actually wrong, cause
blk-mq's view on queue topo isn't same with nvme's view.

> seeing the "completion polled" nvme message?

Yes, "completion polled" can be observed. Meantime the warning in
__irq_startup_managed() can be triggered from
nvme_timeout()->nvme_poll_irqdisable()->enable_irq().


Thanks,
Ming



Re: [PATCH 0/2] PCI/AER: Remove/unexport error reporting enable/disable

2023-07-10 Thread Sathyanarayanan Kuppuswamy



On 7/10/23 4:21 PM, Bjorn Helgaas wrote:
> From: Bjorn Helgaas 
> 
> pci_disable_pcie_error_reporting() is unused; remove it.
> pci_enable_pcie_error_reporting() is used only inside aer.c; make it
> static.

Looks fine to me.

Reviewed-by: Kuppuswamy Sathyanarayanan 


> 
> Bjorn Helgaas (2):
>   PCI/AER: Drop unused pci_disable_pcie_error_reporting()
>   PCI/AER: Unexport pci_enable_pcie_error_reporting()
> 
>  drivers/pci/pcie/aer.c | 15 +--
>  include/linux/aer.h| 11 ---
>  2 files changed, 1 insertion(+), 25 deletions(-)
> 

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer


Re: [PATCH net-next v2 01/10] net: wan: Remove unnecessary (void*) conversions

2023-07-10 Thread yunchuan

On 2023/7/11 00:34, Andrew Lunn wrote:

On Mon, Jul 10, 2023 at 02:39:33PM +0800, Su Hui wrote:

From: wuych 

Pointer variables of void * type do not require type cast.

Signed-off-by: wuych 
---
  drivers/net/wan/fsl_ucc_hdlc.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 47c2ad7a3e42..73c73d8f4bb2 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -350,11 +350,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
  static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
  {
hdlc_device *hdlc = dev_to_hdlc(dev);
-   struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv;
-   struct qe_bd *bd;
-   u16 bd_status;
+   struct ucc_hdlc_private *priv = hdlc->priv;
unsigned long flags;
__be16 *proto_head;
+   struct qe_bd *bd;
+   u16 bd_status;

When dealing with existing broken reverse Christmas tree, please don't
make it worse with a change. But actually fixing it should be in a
different patch.

We want patches to be obviously correct. By removing the cast and
moving variables around, it is less obvious it is correct, than having
two patches.

Got it, thanks.
I will resend the v3 later  which remove the change of reverse Christmas 
tree.

So sorry for this!

Wu Yunchuan



Andrew


[PATCH 2/2] PCI/AER: Unexport pci_enable_pcie_error_reporting()

2023-07-10 Thread Bjorn Helgaas
From: Bjorn Helgaas 

pci_enable_pcie_error_reporting() is used only inside aer.c.  Stop exposing
it outside the file.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/pcie/aer.c | 3 +--
 include/linux/aer.h| 6 --
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index d4c948b7c449..645149608054 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -230,7 +230,7 @@ int pcie_aer_is_native(struct pci_dev *dev)
return pcie_ports_native || host->native_aer;
 }
 
-int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+static int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
int rc;
 
@@ -240,7 +240,6 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
return pcibios_err_to_errno(rc);
 }
-EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
diff --git a/include/linux/aer.h b/include/linux/aer.h
index aadc9242cb20..2dd175f5debd 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -41,14 +41,8 @@ struct aer_capability_regs {
 };
 
 #if defined(CONFIG_PCIEAER)
-/* PCIe port driver needs this function to enable AER */
-int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 #else
-static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-{
-   return -EINVAL;
-}
 static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
return -EINVAL;
-- 
2.34.1



[PATCH 1/2] PCI/AER: Drop unused pci_disable_pcie_error_reporting()

2023-07-10 Thread Bjorn Helgaas
From: Bjorn Helgaas 

pci_disable_pcie_error_reporting() has no callers.  Remove it.

Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/pcie/aer.c | 12 
 include/linux/aer.h|  5 -
 2 files changed, 17 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index f6c24ded134c..d4c948b7c449 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -242,18 +242,6 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
 
-int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-{
-   int rc;
-
-   if (!pcie_aer_is_native(dev))
-   return -EIO;
-
-   rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
-   return pcibios_err_to_errno(rc);
-}
-EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
-
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
int aer = dev->aer_cap;
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 3a3ab05e13fd..aadc9242cb20 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -43,17 +43,12 @@ struct aer_capability_regs {
 #if defined(CONFIG_PCIEAER)
 /* PCIe port driver needs this function to enable AER */
 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 #else
 static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
return -EINVAL;
 }
-static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-{
-   return -EINVAL;
-}
 static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
return -EINVAL;
-- 
2.34.1



[PATCH 0/2] PCI/AER: Remove/unexport error reporting enable/disable

2023-07-10 Thread Bjorn Helgaas
From: Bjorn Helgaas 

pci_disable_pcie_error_reporting() is unused; remove it.
pci_enable_pcie_error_reporting() is used only inside aer.c; make it
static.

Bjorn Helgaas (2):
  PCI/AER: Drop unused pci_disable_pcie_error_reporting()
  PCI/AER: Unexport pci_enable_pcie_error_reporting()

 drivers/pci/pcie/aer.c | 15 +--
 include/linux/aer.h| 11 ---
 2 files changed, 1 insertion(+), 25 deletions(-)

-- 
2.34.1



[PATCH v2] syscalls: Cleanup references to sys_lookup_dcookie()

2023-07-10 Thread Sohil Mehta
commit 'be65de6b03aa ("fs: Remove dcookies support")' removed the
syscall definition for lookup_dcookie.  However, syscall tables still
point to the old sys_lookup_dcookie() definition. Update syscall tables
of all architectures to directly point to sys_ni_syscall() instead.

Signed-off-by: Sohil Mehta 
Reviewed-by: Randy Dunlap 
Acked-by: Namhyung Kim  # for perf
---
v2:
- Rebased to v6.5-rc1. No other dependencies.
- Added acquired tags.
---
 arch/alpha/kernel/syscalls/syscall.tbl  | 2 +-
 arch/arm/tools/syscall.tbl  | 2 +-
 arch/arm64/include/asm/unistd32.h   | 4 ++--
 arch/ia64/kernel/syscalls/syscall.tbl   | 2 +-
 arch/m68k/kernel/syscalls/syscall.tbl   | 2 +-
 arch/microblaze/kernel/syscalls/syscall.tbl | 2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 +-
 arch/parisc/kernel/syscalls/syscall.tbl | 2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl| 2 +-
 arch/s390/kernel/syscalls/syscall.tbl   | 2 +-
 arch/sh/kernel/syscalls/syscall.tbl | 2 +-
 arch/sparc/kernel/syscalls/syscall.tbl  | 2 +-
 arch/x86/entry/syscalls/syscall_32.tbl  | 2 +-
 arch/x86/entry/syscalls/syscall_64.tbl  | 2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl | 2 +-
 include/linux/compat.h  | 1 -
 include/linux/syscalls.h| 1 -
 include/uapi/asm-generic/unistd.h   | 2 +-
 kernel/sys_ni.c | 2 --
 tools/include/uapi/asm-generic/unistd.h | 2 +-
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  | 2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   | 2 +-
 26 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
b/arch/alpha/kernel/syscalls/syscall.tbl
index 1f13995d00d7..1349012f5c2e 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -334,7 +334,7 @@
 401common  io_submit   sys_io_submit
 402common  io_cancel   sys_io_cancel
 405common  exit_group  sys_exit_group
-406common  lookup_dcookie  sys_lookup_dcookie
+406common  lookup_dcookie  sys_ni_syscall
 407common  epoll_createsys_epoll_create
 408common  epoll_ctl   sys_epoll_ctl
 409common  epoll_wait  sys_epoll_wait
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 8ebed8a13874..cb7ea3bf18cf 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -263,7 +263,7 @@
 246common  io_submit   sys_io_submit
 247common  io_cancel   sys_io_cancel
 248common  exit_group  sys_exit_group
-249common  lookup_dcookie  sys_lookup_dcookie
+249common  lookup_dcookie  sys_ni_syscall
 250common  epoll_createsys_epoll_create
 251common  epoll_ctl   sys_epoll_ctl   
sys_oabi_epoll_ctl
 252common  epoll_wait  sys_epoll_wait
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index d952a28463e0..2d8ab890818a 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -508,8 +508,8 @@ __SYSCALL(__NR_io_submit, compat_sys_io_submit)
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
 #define __NR_exit_group 248
 __SYSCALL(__NR_exit_group, sys_exit_group)
-#define __NR_lookup_dcookie 249
-__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
+   /* 249 was lookup_dcookie */
+__SYSCALL(249, sys_ni_syscall)
 #define __NR_epoll_create 250
 __SYSCALL(__NR_epoll_create, sys_epoll_create)
 #define __NR_epoll_ctl 251
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl 
b/arch/ia64/kernel/syscalls/syscall.tbl
index f8c74ffeeefb..ac8bd817b1b9 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -222,7 +222,7 @@
 210common  fadvise64   sys_fadvise64_64
 211common  tgkill  sys_tgkill
 212common  exit_group  sys_exit_group
-213common  lookup_dcookie  sys_lookup_dcookie
+213common  lookup_dcookie  sys_ni_syscall
 214common  io_setupsys_io_setup
 215common  io_destroy  sys_io_destroy
 216common  io_geteventssys_io_getevents
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl 
b/arch/m68k/kernel/syscalls/syscall.tbl
index 

Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-10 Thread Jeff Layton
On Mon, 2023-07-10 at 14:35 +0200, Christian Brauner wrote:
> On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote:
> > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> > > v2:
> > > - prepend patches to add missing ctime updates
> > > - add simple_rename_timestamp helper function
> > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> > > - drop individual inode_ctime_set_{sec,nsec} helpers
> > > 
> > 
> > After review by Jan and others, and Jan's ext4 rework, the diff on top
> > of the series I posted a couple of days ago is below. I don't really
> > want to spam everyone with another ~100 patch v3 series, but I can if
> > you think that's best.
> > 
> > Christian, what would you like me to do here?
> 
> I picked up the series from the list and folded the fixups you posted
> here into the respective fs conversion patches. I hope that helps you
> avoid a resend. You should have received a separate "thank you" mail for
> all of this.
> 
> To each patch that I folded one of the fixlets from below into I added a
> git note that records a link to your mail here and the respective patch
> hunk from this mail that I folded into the patch. git.kernel.org will
> show notes by default. For example,
> https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e
> should show you the note I added. You can also fetch them via
> git fetch $remote refs/notes/*:refs/notes/*
> (You probably know that ofc but jic.) if you're interested.
> 
> Based on v6.5-rc1 as of today.
> 

Many thanks!!! I'll get to work rebasing the multigrain timestamp series
on top of that.

> Btw, both b4 and patchwork somehow treat the series in weird was.
> IOW, based on the message id of the cover letter I was able to pull most
> messages except for:
> 
> [07/92] fs: add ctime accessors infrastructure
> [08/92] fs: new helper: simple_rename_timestamp
> [92/92] fs: rename i_ctime field to __i_ctime
> 
> which I pulled in separately. Not sure what the cause of 
> 
> this is.

Good to know.

I ended up doing the send in two phases: one for the cover letter and
infrastructure patches that went to everyone, and one for the per-
subsystem patches that went do individual maintainers and lists.

I suspect that screwed up the message IDs somehow. Hopefully I won't
need to do a posting like that again soon, but I'll pay closer attention
to the message id handling next time.

Thanks again!
-- 
Jeff Layton 


Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-10 Thread Christian Brauner
On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote:
> On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> > v2:
> > - prepend patches to add missing ctime updates
> > - add simple_rename_timestamp helper function
> > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> > - drop individual inode_ctime_set_{sec,nsec} helpers
> > 
> 
> After review by Jan and others, and Jan's ext4 rework, the diff on top
> of the series I posted a couple of days ago is below. I don't really
> want to spam everyone with another ~100 patch v3 series, but I can if
> you think that's best.
> 
> Christian, what would you like me to do here?

I picked up the series from the list and folded the fixups you posted
here into the respective fs conversion patches. I hope that helps you
avoid a resend. You should have received a separate "thank you" mail for
all of this.

To each patch that I folded one of the fixlets from below into I added a
git note that records a link to your mail here and the respective patch
hunk from this mail that I folded into the patch. git.kernel.org will
show notes by default. For example,
https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e
should show you the note I added. You can also fetch them via
git fetch $remote refs/notes/*:refs/notes/*
(You probably know that ofc but jic.) if you're interested.

Based on v6.5-rc1 as of today.

Btw, both b4 and patchwork somehow treat the series in weird was.
IOW, based on the message id of the cover letter I was able to pull most
messages except for:

[07/92] fs: add ctime accessors infrastructure
[08/92] fs: new helper: simple_rename_timestamp
[92/92] fs: rename i_ctime field to __i_ctime

which I pulled in separately. Not sure what the cause of this is.


Re: [PATCH v2 00/92] fs: new accessors for inode->i_ctime

2023-07-10 Thread Christian Brauner
On Wed, 05 Jul 2023 14:58:09 -0400, Jeff Layton wrote:
> v2:
> - prepend patches to add missing ctime updates
> - add simple_rename_timestamp helper function
> - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> - drop individual inode_ctime_set_{sec,nsec} helpers
> 
> I've been working on a patchset to change how the inode->i_ctime is
> accessed in order to give us conditional, high-res timestamps for the
> ctime and mtime. struct timespec64 has unused bits in it that we can use
> to implement this. In order to do that however, we need to wrap all
> accesses of inode->i_ctime to ensure that bits used as flags are
> appropriately handled.
> 
> [...]

Applied to the vfs.ctime branch of the vfs/vfs.git tree.
Patches in the vfs.ctime branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.ctime

[01/92] ibmvmc: update ctime in conjunction with mtime on write
https://git.kernel.org/vfs/vfs/c/ead310563ad2
[02/92] bfs: update ctime in addition to mtime when adding entries
https://git.kernel.org/vfs/vfs/c/f42faf14b838
[03/92] efivarfs: update ctime when mtime changes on a write
https://git.kernel.org/vfs/vfs/c/d8d026e0d1f2
[04/92] exfat: ensure that ctime is updated whenever the mtime is
https://git.kernel.org/vfs/vfs/c/d84bd8fa48d7
[05/92] apparmor: update ctime whenever the mtime changes on an inode
https://git.kernel.org/vfs/vfs/c/73955caedfae
[06/92] cifs: update the ctime on a partial page write
https://git.kernel.org/vfs/vfs/c/c2f784379c99
[07/92] fs: add ctime accessors infrastructure
https://git.kernel.org/vfs/vfs/c/64f0367de800
[08/92] fs: new helper: simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/54ced54a0239
[09/92] btrfs: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/218e0f662fee
[10/92] ubifs: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/caac4f65568d
[11/92] shmem: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/d3d11e9927b6
[12/92] exfat: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/71534b484c63
[13/92] ntfs3: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/140880821ce0
[14/92] reiserfs: convert to simple_rename_timestamp
https://git.kernel.org/vfs/vfs/c/1a1a4df5e8fc
[15/92] spufs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/784e5a93c9cf
[16/92] s390: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/1cece1f8e5c2
[17/92] binderfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/0bcd830a76f3
[18/92] infiniband: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/811f97f80b01
[19/92] ibm: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/b447ed7597f0
[20/92] usb: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/2557dc7f2dde
[21/92] 9p: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/4cd4b11385ef
[22/92] adfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/e257d7ade66e
[23/92] affs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/770619f19a77
[24/92] afs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/758506e44668
[25/92] fs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/a0a5a9810b37
[26/92] autofs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/d7d1363cc3f6
[27/92] befs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/d6218773de2d
[28/92] bfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/368b313ac2ab
[29/92] btrfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/d3d15221956a
[30/92] ceph: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/818fc6e0129a
[31/92] coda: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/4e0b22fbc012
[32/92] configfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/69c977798a6a
[33/92] cramfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/911f086eae23
[34/92] debugfs: convert to ctime accessor functions
https://git.kernel.org/vfs/vfs/c/634a50390dbb
[35/92] devpts: convert to ctime accessor 

Re: [PATCH v2 1/2] powerpc/tpm: Create linux,sml-base/size as big endian

2023-07-10 Thread Jarkko Sakkinen
On Thu, 2023-06-15 at 22:37 +1000, Michael Ellerman wrote:
> There's code in prom_instantiate_sml() to do a "SML handover" (Stored
> Measurement Log) from OF to Linux, before Linux shuts down Open
> Firmware.
> 
> This involves creating a buffer to hold the SML, and creating two device
> tree properties to record its base address and size. The kernel then
> later reads those properties from the device tree to find the SML.
> 
> When the code was initially added in commit 4a727429abec ("PPC64: Add
> support for instantiating SML from Open Firmware") the powerpc kernel
> was always built big endian, so the properties were created big endian
> by default.
> 
> However since then little endian support was added to powerpc, and now
> the code lacks conversions to big endian when creating the properties.
> 
> This means on little endian kernels the device tree properties are
> little endian, which is contrary to the device tree spec, and in
> contrast to all other device tree properties.
> 
> To cope with that a workaround was added in tpm_read_log_of() to skip
> the endian conversion if the properties were created via the SML
> handover.
> 
> A better solution is to encode the properties as big endian as they
> should be, and remove the workaround.
> 
> Typically changing the encoding of a property like this would present
> problems for kexec. However the SML is not propagated across kexec, so
> changing the encoding of the properties is a non-issue.
> 
> Fixes: e46e22f12b19 ("tpm: enhance read_log_of() to support Physical TPM 
> event log")
> Signed-off-by: Michael Ellerman 
> Reviewed-by: Stefan Berger 
> ---
>  arch/powerpc/kernel/prom_init.c |  8 ++--
>  drivers/char/tpm/eventlog/of.c  | 23 ---
>  2 files changed, 10 insertions(+), 21 deletions(-)

Split into two patches (producer and consumer).

BR, Jarkko

> 
> v2: Add Stefan's reviewed-by.
> 
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index d464ba412084..72fe306b6820 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -1900,6 +1900,7 @@ static void __init prom_instantiate_sml(void)
>   u32 entry = 0, size = 0, succ = 0;
>   u64 base;
>   __be32 val;
> + __be64 val64;
>  
>   prom_debug("prom_instantiate_sml: start...\n");
>  
> @@ -1956,10 +1957,13 @@ static void __init prom_instantiate_sml(void)
>  
>   reserve_mem(base, size);
>  
> + val64 = cpu_to_be64(base);
>   prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base",
> -  , sizeof(base));
> +  , sizeof(val64));
> +
> + val = cpu_to_be32(size);
>   prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size",
> -  , sizeof(size));
> +  , sizeof(val));
>  
>   prom_debug("sml base = 0x%llx\n", base);
>   prom_debug("sml size = 0x%x\n", size);
> diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c
> index 930fe43d5daf..0bc0cb6333c6 100644
> --- a/drivers/char/tpm/eventlog/of.c
> +++ b/drivers/char/tpm/eventlog/of.c
> @@ -51,8 +51,8 @@ static int tpm_read_log_memory_region(struct tpm_chip *chip)
>  int tpm_read_log_of(struct tpm_chip *chip)
>  {
>   struct device_node *np;
> - const u32 *sizep;
> - const u64 *basep;
> + const __be32 *sizep;
> + const __be64 *basep;
>   struct tpm_bios_log *log;
>   u32 size;
>   u64 base;
> @@ -73,23 +73,8 @@ int tpm_read_log_of(struct tpm_chip *chip)
>   if (sizep == NULL || basep == NULL)
>   return -EIO;
>  
> - /*
> -  * For both vtpm/tpm, firmware has log addr and log size in big
> -  * endian format. But in case of vtpm, there is a method called
> -  * sml-handover which is run during kernel init even before
> -  * device tree is setup. This sml-handover function takes care
> -  * of endianness and writes to sml-base and sml-size in little
> -  * endian format. For this reason, vtpm doesn't need conversion
> -  * but physical tpm needs the conversion.
> -  */
> - if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0 &&
> - of_property_match_string(np, "compatible", "IBM,vtpm20") < 0) {
> - size = be32_to_cpup((__force __be32 *)sizep);
> - base = be64_to_cpup((__force __be64 *)basep);
> - } else {
> - size = *sizep;
> - base = *basep;
> - }
> + size = be32_to_cpup(sizep);
> + base = be64_to_cpup(basep);
>  
>   if (size == 0) {
>   dev_warn(>dev, "%s: Event log area empty\n", __func__);



[PATCH v5 21/38] powerpc: Implement the new page table range API

2023-07-10 Thread Matthew Wilcox (Oracle)
Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to
per-folio.

Signed-off-by: Matthew Wilcox (Oracle) 
Acked-by: Mike Rapoport (IBM) 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  5 --
 arch/powerpc/include/asm/book3s/64/pgtable.h |  6 +--
 arch/powerpc/include/asm/book3s/pgtable.h| 11 ++---
 arch/powerpc/include/asm/cacheflush.h| 14 --
 arch/powerpc/include/asm/kvm_ppc.h   | 10 ++--
 arch/powerpc/include/asm/nohash/pgtable.h| 16 ++
 arch/powerpc/include/asm/pgtable.h   | 12 +
 arch/powerpc/mm/book3s64/hash_utils.c| 11 +++--
 arch/powerpc/mm/cacheflush.c | 40 +--
 arch/powerpc/mm/nohash/e500_hugetlbpage.c|  3 +-
 arch/powerpc/mm/pgtable.c| 51 +++-
 11 files changed, 86 insertions(+), 93 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 7bf1fe7297c6..5f12b9382909 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
pgprot)
 pgprot_val(pgprot));
 }
 
-static inline unsigned long pte_pfn(pte_t pte)
-{
-   return pte_val(pte) >> PTE_RPN_SHIFT;
-}
-
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4acc9690f599..c5baa3082a5a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -104,6 +104,7 @@
  * and every thing below PAGE_SHIFT;
  */
 #define PTE_RPN_MASK   (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
+#define PTE_RPN_SHIFT  PAGE_SHIFT
 /*
  * set of bits not changed in pmd_modify. Even though we have hash specific 
bits
  * in here, on radix we expect them to be zero.
@@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
pgprot)
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | 
_PAGE_PTE);
 }
 
-static inline unsigned long pte_pfn(pte_t pte)
-{
-   return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
-}
-
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h 
b/arch/powerpc/include/asm/book3s/pgtable.h
index d18b748ea3ae..3b7bd36a2321 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -9,13 +9,6 @@
 #endif
 
 #ifndef __ASSEMBLY__
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-  pte_t pte);
-
-
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long 
address,
 pte_t *ptep, pte_t entry, int dirty);
@@ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, unsigned 
long address, pte_t
  * corresponding HPTE into the hash table ahead of time, instead of
  * waiting for the inevitable extra hash-table miss exception.
  */
-static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long 
address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+   struct vm_area_struct *vma, unsigned long address,
+   pte_t *ptep, unsigned int nr)
 {
if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE))
return;
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 7564dd4fd12b..ef7d2de33b89 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, 
unsigned long end)
  * It just marks the page as not i-cache clean.  We do the i-cache
  * flush later when the page is given to a user process, if necessary.
  */
-static inline void flush_dcache_page(struct page *page)
+static inline void flush_dcache_folio(struct folio *folio)
 {
if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
return;
/* avoid an atomic op if possible */
-   if (test_bit(PG_dcache_clean, >flags))
-   clear_bit(PG_dcache_clean, >flags);
+   if (test_bit(PG_dcache_clean, >flags))
+   clear_bit(PG_dcache_clean, >flags);
+}
+#define flush_dcache_folio flush_dcache_folio
+
+static inline void flush_dcache_page(struct page *page)
+{
+   flush_dcache_folio(page_folio(page));
 }
 
 void flush_icache_range(unsigned long start, 

Re: [PATCH 00/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT flags

2023-07-10 Thread Sam Ravnborg
Hi Thomas,

On Mon, Jul 10, 2023 at 02:50:04PM +0200, Thomas Zimmermann wrote:
> Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT from
> fbdev and drivers, as briefly discussed at [1]. Both flags were maybe
> useful when fbdev had special handling for driver modules. With
> commit 376b3ff54c9a ("fbdev: Nuke FBINFO_MODULE"), they are both 0
> and have no further effect.
> 
> Patches 1 to 7 remove FBINFO_DEFAULT from drivers. Patches 2 to 5
> split this by the way the fb_info struct is being allocated. All flags
> are cleared to zero during the allocation.
> 
> Patches 8 to 16 do the same for FBINFO_FLAG_DEFAULT. Patch 8 fixes
> an actual bug in how arch/sh uses the tokne for struct fb_videomode,
> which is unrelated.
> 
> Patch 17 removes both flag constants from 

We have a few more flags that are unused - should they be nuked too?
FBINFO_HWACCEL_FILLRECT
FBINFO_HWACCEL_ROTATE
FBINFO_HWACCEL_XPAN

Unused as in no references from fbdev/core/*

I would rather see one series nuke all unused FBINFO flags in one go.
Assuming my quick grep are right and the above can be dropped.

Sam


Re: [PATCH v4 20/36] powerpc: Implement the new page table range API

2023-07-10 Thread Matthew Wilcox
On Sat, Mar 18, 2023 at 09:19:04AM +, Christophe Leroy wrote:
> void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
>   pte_t pte, unsigned int nr)
> {
>   pgprot_t prot;
>   unsigned long pfn;
>   /*
>* Make sure hardware valid bit is not set. We don't do
>* tlb flush for this update.
>*/
>   VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
> 
>   /* Note: mm->context.id might not yet have been assigned as
>* this context might not have been activated yet when this
>* is called.
>*/
>   pte = set_pte_filter(pte);
> 
>   prot = pte_pgprot(pte);
>   pfn = pte_pfn(pte);
>   /* Perform the setting of the PTE */
>   for (;;) {
>   __set_pte_at(mm, addr, ptep, pfn_pte(pfn, prot), 0);
>   if (--nr == 0)
>   break;
>   ptep++;
>   pfn++;
>   addr += PAGE_SIZE;
>   }
> }

I'd rather the per-arch code were as similar to each other and the
generic implementation as possible.  Fewer bugs that way and easier
for other people to make changes that have to touch every architecture
in the future.


Re: [PATCH v4 08/13] powerpc/mm/trace: Convert trace event to trace event class

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> A follow-up patch will add a pud variant for this same event.
> Using event class makes that addition simpler.
> 
> No functional change in this patch.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

> ---
>   arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
>   arch/powerpc/mm/book3s64/radix_pgtable.c |  2 +-
>   include/trace/events/thp.h   | 23 ---
>   3 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
> b/arch/powerpc/mm/book3s64/hash_pgtable.c
> index 51f48984abca..988948d69bc1 100644
> --- a/arch/powerpc/mm/book3s64/hash_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
> @@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct 
> *mm, unsigned long addr
>   
>   old = be64_to_cpu(old_be);
>   
> - trace_hugepage_update(addr, old, clr, set);
> + trace_hugepage_update_pmd(addr, old, clr, set);
>   if (old & H_PAGE_HASHPTE)
>   hpte_do_hugepage_flush(mm, addr, pmdp, old);
>   return old;
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index e7ea492ac510..02e185d2e4d6 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -962,7 +962,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct 
> *mm, unsigned long add
>   #endif
>   
>   old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
> - trace_hugepage_update(addr, old, clr, set);
> + trace_hugepage_update_pmd(addr, old, clr, set);
>   
>   return old;
>   }
> diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
> index 202b3e3e67ff..a95c78b10561 100644
> --- a/include/trace/events/thp.h
> +++ b/include/trace/events/thp.h
> @@ -8,25 +8,29 @@
>   #include 
>   #include 
>   
> -TRACE_EVENT(hugepage_set_pmd,
> +DECLARE_EVENT_CLASS(hugepage_set,
>   
> - TP_PROTO(unsigned long addr, unsigned long pmd),
> - TP_ARGS(addr, pmd),
> + TP_PROTO(unsigned long addr, unsigned long pte),
> + TP_ARGS(addr, pte),
>   TP_STRUCT__entry(
>   __field(unsigned long, addr)
> - __field(unsigned long, pmd)
> + __field(unsigned long, pte)
>   ),
>   
>   TP_fast_assign(
>   __entry->addr = addr;
> - __entry->pmd = pmd;
> + __entry->pte = pte;
>   ),
>   
> - TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, 
> __entry->pmd)
> + TP_printk("Set page table entry with 0x%lx with 0x%lx", 
> __entry->addr, __entry->pte)
>   );
>   
> +DEFINE_EVENT(hugepage_set, hugepage_set_pmd,
> + TP_PROTO(unsigned long addr, unsigned long pmd),
> + TP_ARGS(addr, pmd)
> +);
>   
> -TRACE_EVENT(hugepage_update,
> +DECLARE_EVENT_CLASS(hugepage_update,
>   
>   TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, 
> unsigned long set),
>   TP_ARGS(addr, pte, clr, set),
> @@ -48,6 +52,11 @@ TRACE_EVENT(hugepage_update,
>   TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 
> 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
>   );
>   
> +DEFINE_EVENT(hugepage_update, hugepage_update_pmd,
> + TP_PROTO(unsigned long addr, unsigned long pmd, unsigned long clr, 
> unsigned long set),
> + TP_ARGS(addr, pmd, clr, set)
> +);
> +
>   DECLARE_EVENT_CLASS(migration_pmd,
>   
>   TP_PROTO(unsigned long addr, unsigned long pmd),


Re: [PATCH v4 06/13] mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> pudp_set_wrprotect and move_huge_pud helpers are only used when
> CONFIG_TRANSPARENT_HUGEPAGE is enabled. Similar to pmdp_set_wrprotect and
> move_huge_pmd_helpers use architecture override only if
> CONFIG_TRANSPARENT_HUGEPAGE is set
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

> ---
>   include/linux/pgtable.h | 2 ++
>   mm/mremap.c | 2 +-
>   2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 91def34f7784..b5af3e014606 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -558,6 +558,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct 
> *mm,
>   #endif
>   #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
>   #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   static inline void pudp_set_wrprotect(struct mm_struct *mm,
> unsigned long address, pud_t *pudp)
>   {
> @@ -571,6 +572,7 @@ static inline void pudp_set_wrprotect(struct mm_struct 
> *mm,
>   {
>   BUILD_BUG();
>   }
> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>   #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
>   #endif
>   
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 11e06e4ab33b..056478c106ee 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -349,7 +349,7 @@ static inline bool move_normal_pud(struct vm_area_struct 
> *vma,
>   }
>   #endif
>   
> -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && 
> defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
>   static bool move_huge_pud(struct vm_area_struct *vma, unsigned long 
> old_addr,
> unsigned long new_addr, pud_t *old_pud, pud_t 
> *new_pud)
>   {


Re: [PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> This helps architectures to override pmd_same and pud_same independently.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

Shouldn't you do it the modern way and use #ifndef pud_same instead of a 
new __HAVE_ARCH_PUD_SAME like in the old days ?

> ---
>   include/linux/pgtable.h | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 6fd9b2831338..91def34f7784 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
>   {
>   return pmd_val(pmd_a) == pmd_val(pmd_b);
>   }
> +#endif
>   
> +#ifndef __HAVE_ARCH_PUD_SAME
>   static inline int pud_same(pud_t pud_a, pud_t pud_b)
>   {
>   return pud_val(pud_a) == pud_val(pud_b);


Re: [PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> Architectures like powerpc will like to use different page table allocators
> and mapping mechanisms to implement vmemmap optimization. Similar to
> vmemmap_populate allow architectures to implement
> vmemap_populate_compound_pages
> 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>   mm/sparse-vmemmap.c | 3 +++
>   1 file changed, 3 insertions(+)
> 
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index a044a130405b..541b3f69a481 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
>   start, end - 1);
>   }
>   
> +#ifndef vmemmap_populate_compound_pages
>   pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
> node,
>  struct vmem_altmap *altmap,
>  struct page *reuse)

Should vmemmap_pte_populate() be static ?

It looks odd to exclude a non-static function based on a non related macro.

There are several such function in the block being excluded here. Can 
you explain why it is correct to do that ?

> @@ -446,6 +447,8 @@ static int __meminit 
> vmemmap_populate_compound_pages(unsigned long start_pfn,
>   return 0;
>   }
>   
> +#endif
> +
>   struct page * __meminit __populate_section_memmap(unsigned long pfn,
>   unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
>   struct dev_pagemap *pgmap)


Re: [PATCH v2 07/12] s390: add pte_free_defer() for pgtables sharing page

2023-07-10 Thread Jason Gunthorpe
On Wed, Jul 05, 2023 at 02:55:16PM +0200, Gerald Schaefer wrote:

> Ah ok, I was aware of that "semi-RCU" fallback logic in tlb_remove_table(),
> but that is rather a generic issue, and not s390-specific. I thought you
> meant some s390-oddity here, of which we have a lot, unfortunately...
> Of course, we call tlb_remove_table() from our page_table_free_rcu(), so
> I guess you could say that page_table_free_rcu() cannot guarantee what
> tlb_remove_table() cannot guarantee.

The issue is the arches don't provide a reliable way to RCU free
things, so the core code creates an RCU situation using the MMU
batch. With the non-RCU compatible IPI fallback. So it isn't actually
RCU, it is IPI but optimized with RCU in some cases.

When Hugh introduces a reliable way to RCU free stuff we could fall
back to that in the TLB code instead of invoking the synchronize_rcu()

For lots of arches, S390 included after this series, this would be
pretty easy.

What I see now as the big trouble is that this series only addresses
PTE RCU'ness and making all the other levels RCUable would be much
harder on some arches like power.

In short we could create a CONFIG_ARCH_RCU_SAFE_PAGEWALK and it could
be done on alot of arches quite simply, but at least not power. Which
makes me wonder about the value, but maybe it could shame power into
doing something..

However, calling things 'page_table_free_rcu()' when it doesn't
actually always do RCU but IPI optimzed RCU is an unfortunate name :(
As long as you never assume it does RCU anywhere else, and don't use
rcu_read_lock(), it is fine :)

The corner case is narrow, you have to OOM the TLB batching before you
loose the RCU optimization of the IPI.  Then you can notice that
rcu_read_lock() doesn't actually protect against concurrent free.

Jason


Re: [PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within
> vmemmap such that tail page mapping can point to the second PAGE_SIZE area.
> Enforce that in vmemmap_can_optimize() function.
> 
> Architectures like powerpc also want to enable vmemmap optimization
> conditionally (only with radix MMU translation). Hence allow architecture
> override.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

Why renaming vmemmap_can_optimize() to __vmemmap_can_optimize() and keep 
it when vmemmap_can_optimize() has been override ? Is that because you 
expect overriding version of vmemmap_can_optimize() to call 
__vmemmap_can_optimize() ?

> ---
>   include/linux/mm.h | 27 +++
>   mm/mm_init.c   |  2 +-
>   2 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 2dd73e4f3d8e..1a2234ee14d2 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3639,13 +3639,32 @@ void vmemmap_free(unsigned long start, unsigned long 
> end,
>   struct vmem_altmap *altmap);
>   #endif
>   
> +#define VMEMMAP_RESERVE_NR   2
>   #ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP
> -static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
> -struct dev_pagemap *pgmap)
> +static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
> +   struct dev_pagemap *pgmap)
>   {
> - return is_power_of_2(sizeof(struct page)) &&
> - pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap;
> + unsigned long nr_pages;
> + unsigned long nr_vmemmap_pages;
> +
> + if (!pgmap || !is_power_of_2(sizeof(struct page)))
> + return false;
> +
> + nr_pages = pgmap_vmemmap_nr(pgmap);
> + nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
> + /*
> +  * For vmemmap optimization with DAX we need minimum 2 vmemmap
> +  * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
> +  */
> + return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
>   }
> +/*
> + * If we don't have an architecture override, use the generic rule
> + */
> +#ifndef vmemmap_can_optimize
> +#define vmemmap_can_optimize __vmemmap_can_optimize
> +#endif
> +
>   #else
>   static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
>  struct dev_pagemap *pgmap)
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index a1963c3322af..245ac69b66a5 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1020,7 +1020,7 @@ static inline unsigned long compound_nr_pages(struct 
> vmem_altmap *altmap,
>   if (!vmemmap_can_optimize(altmap, pgmap))
>   return pgmap_vmemmap_nr(pgmap);
>   
> - return 2 * (PAGE_SIZE / sizeof(struct page));
> + return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page));
>   }
>   
>   static void __ref memmap_init_compound(struct page *head,


Re: [PATCH v4 02/13] mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> We will use this in a later patch to do tlb flush when clearing pud entries
> on powerpc. This is similar to commit 93a98695f2f9 ("mm: change
> pmdp_huge_get_and_clear_full take vm_area_struct as arg")
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

> ---
>   include/linux/pgtable.h | 4 ++--
>   mm/debug_vm_pgtable.c   | 2 +-
>   mm/huge_memory.c| 2 +-
>   3 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index cf13f8d938a8..6fd9b2831338 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -450,11 +450,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct 
> vm_area_struct *vma,
>   #endif
>   
>   #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
> -static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
> +static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
>   unsigned long address, pud_t *pudp,
>   int full)
>   {
> - return pudp_huge_get_and_clear(mm, address, pudp);
> + return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
>   }
>   #endif
>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index ee119e33fef1..ee2c4c1dcfc8 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -385,7 +385,7 @@ static void __init pud_advanced_tests(struct 
> pgtable_debug_args *args)
>   WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
>   
>   #ifndef __PAGETABLE_PMD_FOLDED
> - pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
> + pudp_huge_get_and_clear_full(args->vma, vaddr, args->pudp, 1);
>   pud = READ_ONCE(*args->pudp);
>   WARN_ON(!pud_none(pud));
>   #endif /* __PAGETABLE_PMD_FOLDED */
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index eb3678360b97..ba20cef681a4 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1981,7 +1981,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct 
> vm_area_struct *vma,
>   if (!ptl)
>   return 0;
>   
> - pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm);
> + pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
>   tlb_remove_pud_tlb_entry(tlb, pud, addr);
>   if (vma_is_special_huge(vma)) {
>   spin_unlock(ptl);


Re: [PATCH v4 01/13] mm/hugepage pud: Allow arch-specific helper function to check huge page pud support

2023-07-10 Thread Christophe Leroy


Le 10/07/2023 à 18:08, Aneesh Kumar K.V a écrit :
> Architectures like powerpc would like to enable transparent huge page pud
> support only with radix translation. To support that add
> has_transparent_pud_hugepage() helper that architectures can override.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Christophe Leroy 

> ---
>   drivers/nvdimm/pfn_devs.c | 2 +-
>   include/linux/pgtable.h   | 3 +++
>   2 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
> index af7d9301520c..18ad315581ca 100644
> --- a/drivers/nvdimm/pfn_devs.c
> +++ b/drivers/nvdimm/pfn_devs.c
> @@ -100,7 +100,7 @@ static unsigned long 
> *nd_pfn_supported_alignments(unsigned long *alignments)
>   
>   if (has_transparent_hugepage()) {
>   alignments[1] = HPAGE_PMD_SIZE;
> - if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
> + if (has_transparent_pud_hugepage())
>   alignments[2] = HPAGE_PUD_SIZE;
>   }
>   
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 5063b482e34f..cf13f8d938a8 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1499,6 +1499,9 @@ typedef unsigned int pgtbl_mod_mask;
>   #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
>   #endif
>   
> +#ifndef has_transparent_pud_hugepage
> +#define has_transparent_pud_hugepage() 
> IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
> +#endif
>   /*
>* On some architectures it depends on the mm if the p4d/pud or pmd
>* layer of the page table hierarchy is folded or not.


[PATCH 2/2] powerpc/crypto: don't build aes-gcm-p10 by default

2023-07-10 Thread Omar Sandoval
From: Omar Sandoval 

None of the other accelerated crypto modules are built by default.

Signed-off-by: Omar Sandoval 
---
 arch/powerpc/crypto/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 81ae015861c0..97802c72317c 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -101,7 +101,6 @@ config CRYPTO_AES_GCM_P10
select CRYPTO_ALGAPI
select CRYPTO_AEAD
select CRYPTO_SKCIPHER
-   default m
help
  AEAD cipher: AES cipher algorithms (FIPS-197)
  GCM (Galois/Counter Mode) authenticated encryption mode (NIST 
SP800-38D)
-- 
2.41.0



Re: [PATCH 2/2] nvme-pci: use blk_mq_max_nr_hw_queues() to calculate io queues

2023-07-10 Thread Keith Busch
On Mon, Jul 10, 2023 at 05:14:15PM +0800, Ming Lei wrote:
> On Mon, Jul 10, 2023 at 08:41:09AM +0200, Christoph Hellwig wrote:
> > On Sat, Jul 08, 2023 at 10:02:59AM +0800, Ming Lei wrote:
> > > Take blk-mq's knowledge into account for calculating io queues.
> > > 
> > > Fix wrong queue mapping in case of kdump kernel.
> > > 
> > > On arm and ppc64, 'maxcpus=1' is passed to kdump command line, see
> > > `Documentation/admin-guide/kdump/kdump.rst`, so num_possible_cpus()
> > > still returns all CPUs.
> > 
> > That's simply broken.  Please fix the arch code to make sure
> > it does not return a bogus num_possible_cpus value for these
> 
> That is documented in Documentation/admin-guide/kdump/kdump.rst.
> 
> On arm and ppc64, 'maxcpus=1' is passed for kdump kernel, and "maxcpu=1"
> simply keep one of CPU cores as online, and others as offline.
> 
> So Cc our arch(arm & ppc64) & kdump guys wrt. passing 'maxcpus=1' for
> kdump kernel.
> 
> > setups, otherwise you'll have to paper over it in all kind of
> > drivers.
> 
> The issue is only triggered for drivers which use managed irq &
> multiple hw queues.

Is the problem that the managed interrupt sets the effective irq
affinity to an offline CPU? You mentioned observed timeouts; are you
seeing the "completion polled" nvme message?


RE: [PATCH] soc: fsl: qe: Replace all non-returning strlcpy with strscpy

2023-07-10 Thread Leo Li


> -Original Message-
> From: Azeem Shaikh 
> Sent: Sunday, July 9, 2023 9:36 PM
> To: Kees Cook 
> Cc: Qiang Zhao ; linux-harden...@vger.kernel.org;
> linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org; Leo Li
> ; linux-arm-ker...@lists.infradead.org
> Subject: Re: [PATCH] soc: fsl: qe: Replace all non-returning strlcpy with
> strscpy
> 
> On Tue, May 23, 2023 at 1:20 PM Kees Cook 
> wrote:
> >
> > On Tue, May 23, 2023 at 02:14:25AM +, Azeem Shaikh wrote:
> > > strlcpy() reads the entire source buffer first.
> > > This read may exceed the destination size limit.
> > > This is both inefficient and can lead to linear read overflows if a
> > > source string is not NUL-terminated [1].
> > > In an effort to remove strlcpy() completely [2], replace
> > > strlcpy() here with strscpy().
> > > No return values were used, so direct replacement is safe.
> > >
> > > [1]
> > >
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fww
> > >
> w.kernel.org%2Fdoc%2Fhtml%2Flatest%2Fprocess%2Fdeprecated.html%23s
> tr
> > >
> lcpy=05%7C01%7Cleoyang.li%40nxp.com%7C11f9df1df1b5440e4ec108
> db8
> > >
> 0ee64de%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63824553360
> 3780
> > >
> 889%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2lu
> MzIiLCJB
> > >
> TiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=jcTy3IF37wqC1
> MWsSuF
> > > %2F51Z1trQEMaow7BHkPSh3hzI%3D=0
> > > [2]
> > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgi
> > >
> thub.com%2FKSPP%2Flinux%2Fissues%2F89=05%7C01%7Cleoyang.li%
> 40nx
> > >
> p.com%7C11f9df1df1b5440e4ec108db80ee64de%7C686ea1d3bc2b4c6fa92cd
> 99c5
> > >
> c301635%7C0%7C0%7C638245533603780889%7CUnknown%7CTWFpbGZsb3d
> 8eyJWIjo
> > >
> iMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C30
> 00%7
> > >
> C%7C%7C=Blr0W1oYPIw5uDu7HqlEkU7xOuAo4bQNkk%2Bt%2BAuFqc
> s%3D
> > > erved=0
> > >
> > > Signed-off-by: Azeem Shaikh 
> >
> > Reviewed-by: Kees Cook 
> >
> 
> Friendly ping on this.

Sorry for the late response.  But I found some old discussions with the 
conclusion to be not converting old users.  Has this been changed later on?
https://lwn.net/Articles/659214/

Regards,
Leo


[PATCH 1/2] powerpc/crypto: fix missing skcipher dependency for aes-gcm-p10

2023-07-10 Thread Omar Sandoval
From: Omar Sandoval 

My stripped down configuration fails to build with:

  ERROR: modpost: "skcipher_walk_aead_encrypt" 
[arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined!
  ERROR: modpost: "skcipher_walk_done" 
[arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined!
  ERROR: modpost: "skcipher_walk_aead_decrypt" 
[arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined!

Fix it by selecting CRYPTO_SKCIPHER.

Signed-off-by: Omar Sandoval 
---
 arch/powerpc/crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index ad1872518992..81ae015861c0 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -100,6 +100,7 @@ config CRYPTO_AES_GCM_P10
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
select CRYPTO_AEAD
+   select CRYPTO_SKCIPHER
default m
help
  AEAD cipher: AES cipher algorithms (FIPS-197)
-- 
2.41.0



[PATCH v4 13/13] powerpc/book3s64/radix: Add debug message to give more details of vmemmap allocation

2023-07-10 Thread Aneesh Kumar K.V
Add some extra vmemmap pr_debug message that will indicate the type of
vmemmap allocations.

For ex: with DAX vmemmap optimization we can find the below details:
[  187.166580] radix-mmu: PAGE_SIZE vmemmap mapping
[  187.166587] radix-mmu: PAGE_SIZE vmemmap mapping
[  187.166591] radix-mmu: Tail page reuse vmemmap mapping
[  187.166594] radix-mmu: Tail page reuse vmemmap mapping
[  187.166598] radix-mmu: Tail page reuse vmemmap mapping
[  187.166601] radix-mmu: Tail page reuse vmemmap mapping
[  187.166604] radix-mmu: Tail page reuse vmemmap mapping
[  187.166608] radix-mmu: Tail page reuse vmemmap mapping
[  187.166611] radix-mmu: Tail page reuse vmemmap mapping
[  187.166614] radix-mmu: Tail page reuse vmemmap mapping
[  187.166617] radix-mmu: Tail page reuse vmemmap mapping
[  187.166620] radix-mmu: Tail page reuse vmemmap mapping
[  187.166623] radix-mmu: Tail page reuse vmemmap mapping
[  187.166626] radix-mmu: Tail page reuse vmemmap mapping
[  187.166629] radix-mmu: Tail page reuse vmemmap mapping
[  187.166632] radix-mmu: Tail page reuse vmemmap mapping

And without vmemmap optimization
[  293.549931] radix-mmu: PMD_SIZE vmemmap mapping
[  293.549984] radix-mmu: PMD_SIZE vmemmap mapping
[  293.550032] radix-mmu: PMD_SIZE vmemmap mapping
[  293.550076] radix-mmu: PMD_SIZE vmemmap mapping
[  293.550117] radix-mmu: PMD_SIZE vmemmap mapping

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 73d0987369ff..2828e7e0802c 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1034,6 +1034,7 @@ static pte_t * __meminit 
radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long
p = vmemmap_alloc_block_buf(PAGE_SIZE, node, 
NULL);
if (!p)
return NULL;
+   pr_debug("PAGE_SIZE vmemmap mapping\n");
} else {
/*
 * When a PTE/PMD entry is freed from the init_mm
@@ -1046,6 +1047,7 @@ static pte_t * __meminit 
radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long
 */
get_page(reuse);
p = page_to_virt(reuse);
+   pr_debug("Tail page reuse vmemmap mapping\n");
}
 
VM_BUG_ON(!PAGE_ALIGNED(addr));
@@ -1155,6 +1157,7 @@ int __meminit radix__vmemmap_populate(unsigned long 
start, unsigned long end, in
p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
vmemmap_set_pmd(pmd, p, node, addr, next);
+   pr_debug("PMD_SIZE vmemmap mapping\n");
continue;
} else if (altmap) {
/*
-- 
2.41.0



Re: [PATCH v7 5/8] KVM: x86/mmu: Don't pass FOLL_GET to __kvm_follow_pfn

2023-07-10 Thread Isaku Yamahata
On Fri, Jul 07, 2023 at 10:35:02AM +0900,
David Stevens  wrote:

> > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > > index e44ab512c3a1..b1607e314497 100644
> > > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > >
> > > > ...
> > > >
> > > > > @@ -2937,6 +2943,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, 
> > > > > struct kvm_memory_slot *slot,
> > > > >   bool host_writable = !fault || fault->map_writable;
> > > > >   bool prefetch = !fault || fault->prefetch;
> > > > >   bool write_fault = fault && fault->write;
> > > > > + bool is_refcounted = !fault || fault->is_refcounted_page;
> > > >
> > > > Just wonder, what if a non-refcounted page is prefetched?  Or is it 
> > > > possible in
> > > > practice?
> > >
> > > Prefetching is still done via gfn_to_page_many_atomic, which sets
> > > FOLL_GET. That's fixable, but it's not something this series currently
> > > does.
> >
> > So if we prefetch a page, REFCOUNTED bit is cleared unconditionally with 
> > this
> > hunk.  kvm_set_page_{dirty, accessed} won't be called as expected for 
> > prefetched
> > spte.  If I read the patch correctly, REFCOUNTED bit in SPTE should 
> > represent
> > whether the corresponding page is ref-countable or not, right?
> >
> > Because direct_pte_prefetch_many() is for legacy KVM MMU and 
> > FNAME(prefetch_pte)
> > is shadow paging, we need to test it with legacy KVM MMU or shadow paging 
> > to hit
> > the issue, though.
> >
> 
> direct_pte_prefetch_many and prefetch_gpte both pass NULL for the
> fault parameter, so is_refcounted will evaluate to true. So the spte's
> refcounted bit will get set in that case.

Oops, my bad.  My point is "unconditionally".  Is the bit always set for
non-refcountable pages?  Or non-refcountable pages are not prefeched?
-- 
Isaku Yamahata 


Re: [PATCH net-next v2 01/10] net: wan: Remove unnecessary (void*) conversions

2023-07-10 Thread Andrew Lunn
On Mon, Jul 10, 2023 at 02:39:33PM +0800, Su Hui wrote:
> From: wuych 
> 
> Pointer variables of void * type do not require type cast.
> 
> Signed-off-by: wuych 
> ---
>  drivers/net/wan/fsl_ucc_hdlc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
> index 47c2ad7a3e42..73c73d8f4bb2 100644
> --- a/drivers/net/wan/fsl_ucc_hdlc.c
> +++ b/drivers/net/wan/fsl_ucc_hdlc.c
> @@ -350,11 +350,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
>  static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
>  {
>   hdlc_device *hdlc = dev_to_hdlc(dev);
> - struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)hdlc->priv;
> - struct qe_bd *bd;
> - u16 bd_status;
> + struct ucc_hdlc_private *priv = hdlc->priv;
>   unsigned long flags;
>   __be16 *proto_head;
> + struct qe_bd *bd;
> + u16 bd_status;

When dealing with existing broken reverse Christmas tree, please don't
make it worse with a change. But actually fixing it should be in a
different patch.

We want patches to be obviously correct. By removing the cast and
moving variables around, it is less obvious it is correct, than having
two patches.

   Andrew


Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Miguel Ojeda
On Mon, Jul 10, 2023 at 5:22 PM Thomas Zimmermann  wrote:
>
> I'll append a patch to the series that documents this.
>
> Sure.

Thanks!

If you are planning to take it into some other tree:

Acked-by: Miguel Ojeda 

Otherwise, I can take it into the `auxdisplay` tree.

Cheers,
Miguel


[PATCH v4 12/13] powerpc/book3s64/radix: Remove mmu_vmemmap_psize

2023-07-10 Thread Aneesh Kumar K.V
This is not used by radix anymore.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ---
 arch/powerpc/mm/init_64.c| 21 ++---
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index b492b67c0b7d..73d0987369ff 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -601,17 +601,6 @@ void __init radix__early_init_mmu(void)
 #else
mmu_virtual_psize = MMU_PAGE_4K;
 #endif
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-   /* vmemmap mapping */
-   if (mmu_psize_defs[MMU_PAGE_2M].shift) {
-   /*
-* map vmemmap using 2M if available
-*/
-   mmu_vmemmap_psize = MMU_PAGE_2M;
-   } else
-   mmu_vmemmap_psize = mmu_virtual_psize;
-#endif
 #endif
/*
 * initialize page table size
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 5701faca39ef..6db7a063ba63 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -198,17 +198,12 @@ bool altmap_cross_boundary(struct vmem_altmap *altmap, 
unsigned long start,
return false;
 }
 
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int 
node,
-   struct vmem_altmap *altmap)
+int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int 
node,
+struct vmem_altmap *altmap)
 {
bool altmap_alloc;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
 
-#ifdef CONFIG_PPC_BOOK3S_64
-   if (radix_enabled())
-   return radix__vmemmap_populate(start, end, node, altmap);
-#endif
-
/* Align to the page size of the linear mapping. */
start = ALIGN_DOWN(start, page_size);
 
@@ -277,6 +272,18 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node,
return 0;
 }
 
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int 
node,
+  struct vmem_altmap *altmap)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+   if (radix_enabled())
+   return radix__vmemmap_populate(start, end, node, altmap);
+#endif
+
+   return __vmemmap_populate(start, end, node, altmap);
+}
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 static unsigned long vmemmap_list_free(unsigned long start)
 {
-- 
2.41.0



[PATCH v4 11/13] powerpc/book3s64/radix: Add support for vmemmap optimization for radix

2023-07-10 Thread Aneesh Kumar K.V
With 2M PMD-level mapping, we require 32 struct pages and a single vmemmap
page can contain 1024 struct pages (PAGE_SIZE/sizeof(struct page)). Hence
with 64K page size, we don't use vmemmap deduplication for PMD-level
mapping.

Signed-off-by: Aneesh Kumar K.V 
---
 Documentation/mm/vmemmap_dedup.rst |   1 +
 Documentation/powerpc/index.rst|   1 +
 Documentation/powerpc/vmemmap_dedup.rst| 101 ++
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/book3s/64/radix.h |   9 +
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 203 +
 6 files changed, 316 insertions(+)
 create mode 100644 Documentation/powerpc/vmemmap_dedup.rst

diff --git a/Documentation/mm/vmemmap_dedup.rst 
b/Documentation/mm/vmemmap_dedup.rst
index a4b12ff906c4..c573e08b5043 100644
--- a/Documentation/mm/vmemmap_dedup.rst
+++ b/Documentation/mm/vmemmap_dedup.rst
@@ -210,6 +210,7 @@ the device (altmap).
 
 The following page sizes are supported in DAX: PAGE_SIZE (4K on x86_64),
 PMD_SIZE (2M on x86_64) and PUD_SIZE (1G on x86_64).
+For powerpc equivalent details see Documentation/powerpc/vmemmap_dedup.rst
 
 The differences with HugeTLB are relatively minor.
 
diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index d33b554ca7ba..a50834798454 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -36,6 +36,7 @@ powerpc
 ultravisor
 vas-api
 vcpudispatch_stats
+vmemmap_dedup
 
 features
 
diff --git a/Documentation/powerpc/vmemmap_dedup.rst 
b/Documentation/powerpc/vmemmap_dedup.rst
new file mode 100644
index ..dc4db59fdf87
--- /dev/null
+++ b/Documentation/powerpc/vmemmap_dedup.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Device DAX
+==
+
+The device-dax interface uses the tail deduplication technique explained in
+Documentation/mm/vmemmap_dedup.rst
+
+On powerpc, vmemmap deduplication is only used with radix MMU translation. Also
+with a 64K page size, only the devdax namespace with 1G alignment uses vmemmap
+deduplication.
+
+With 2M PMD level mapping, we require 32 struct pages and a single 64K vmemmap
+page can contain 1024 struct pages (64K/sizeof(struct page)). Hence there is no
+vmemmap deduplication possible.
+
+With 1G PUD level mapping, we require 16384 struct pages and a single 64K
+vmemmap page can contain 1024 struct pages (64K/sizeof(struct page)). Hence we
+require 16 64K pages in vmemmap to map the struct page for 1G PUD level 
mapping.
+
+Here's how things look like on device-dax after the sections are populated::
+ +---+ ---virt_to_page---> +---+   mapping to   +---+
+ |   | | 0 | -> | 0 |
+ |   | +---++---+
+ |   | | 1 | -> | 1 |
+ |   | +---++---+
+ |   | | 2 | ^ ^ ^ ^ ^ ^
+ |   | +---+   | | | | |
+ |   | | 3 | --+ | | | |
+ |   | +---+ | | | |
+ |   | | 4 | + | | |
+ |PUD| +---+   | | |
+ |   level   | | . | --+ | |
+ |  mapping  | +---+ | |
+ |   | | . | + |
+ |   | +---+   |
+ |   | | 15| --+
+ |   | +---+
+ |   |
+ |   |
+ |   |
+ +---+
+
+
+With 4K page size, 2M PMD level mapping requires 512 struct pages and a single
+4K vmemmap page contains 64 struct pages(4K/sizeof(struct page)). Hence we
+require 8 4K pages in vmemmap to map the struct page for 2M pmd level mapping.
+
+Here's how things look like on device-dax after the sections are populated::
+
+ +---+ ---virt_to_page---> +---+   mapping to   +---+
+ |   | | 0 | -> | 0 |
+ |   | +---++---+
+ |   | | 1 | -> | 1 |
+ |   | +---++---+
+ |   | | 2 | ^ ^ ^ ^ ^ ^
+ |   | +---+   | | | | |
+ |   | | 3 | --+ | | | |
+ |   | 

[PATCH v4 10/13] powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function

2023-07-10 Thread Aneesh Kumar K.V
This is in preparation to update radix to implement vmemmap optimization
for devdax. Below are the rules w.r.t radix vmemmap mapping

1. First try to map things using PMD (2M)
2. With altmap if altmap cross-boundary check returns true, fall back to
   PAGE_SIZE
3. If we can't allocate PMD_SIZE backing memory for vmemmap, fallback to
   PAGE_SIZE

On removing vmemmap mapping, check if every subsection that is using the
vmemmap area is invalid. If found to be invalid, that implies we can safely
free the vmemmap area. We don't use the PAGE_UNUSED pattern used by x86
because with 64K page size, we need to do the above check even at the
PAGE_SIZE granularity.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/radix.h |   2 +
 arch/powerpc/include/asm/pgtable.h |   4 +
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 326 +++--
 arch/powerpc/mm/init_64.c  |  26 +-
 4 files changed, 327 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
b/arch/powerpc/include/asm/book3s/64/radix.h
index 2ef92f36340f..f1461289643a 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -331,6 +331,8 @@ extern int __meminit radix__vmemmap_create_mapping(unsigned 
long start,
 unsigned long phys);
 int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
  int node, struct vmem_altmap *altmap);
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+  struct vmem_altmap *altmap);
 extern void radix__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
 
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 6a88bfdaa69b..68817ea7f994 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -165,6 +165,10 @@ static inline bool is_ioremap_addr(const void *x)
 
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
 }
+
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int 
vmemmap_map_size);
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+  unsigned long page_size);
 #endif /* CONFIG_PPC64 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 227fea53c217..9a7f3707b6fb 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -744,8 +744,59 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
p4d_clear(p4d);
 }
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long 
end)
+{
+   unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+
+   return !vmemmap_populated(start, PMD_SIZE);
+}
+
+static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long 
end)
+{
+   unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+
+   return !vmemmap_populated(start, PAGE_SIZE);
+
+}
+#endif
+
+static void __meminit free_vmemmap_pages(struct page *page,
+struct vmem_altmap *altmap,
+int order)
+{
+   unsigned int nr_pages = 1 << order;
+
+   if (altmap) {
+   unsigned long alt_start, alt_end;
+   unsigned long base_pfn = page_to_pfn(page);
+
+   /*
+* with 2M vmemmap mmaping we can have things setup
+* such that even though atlmap is specified we never
+* used altmap.
+*/
+   alt_start = altmap->base_pfn;
+   alt_end = altmap->base_pfn + altmap->reserve +
+   altmap->free + altmap->alloc + altmap->align;
+
+   if (base_pfn >= alt_start && base_pfn < alt_end) {
+   vmem_altmap_free(altmap, nr_pages);
+   return;
+   }
+   }
+
+   if (PageReserved(page)) {
+   /* allocated from memblock */
+   while (nr_pages--)
+   free_reserved_page(page++);
+   } else
+   free_pages((unsigned long)page_address(page), order);
+}
+
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
-unsigned long end, bool direct)
+unsigned long end, bool direct,
+struct vmem_altmap *altmap)
 {
unsigned long next, pages = 0;
pte_t *pte;
@@ -759,24 +810,26 @@ static void remove_pte_table(pte_t *pte_start, unsigned 
long addr,
if (!pte_present(*pte))
continue;
 
-   if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
-   /*
-   

[PATCH v4 09/13] powerpc/book3s64/mm: Enable transparent pud hugepage

2023-07-10 Thread Aneesh Kumar K.V
This is enabled only with radix translation and 1G hugepage size. This will
be used with devdax device memory with a namespace alignment of 1G.

Anon transparent hugepage is not supported even though we do have helpers
checking pud_trans_huge(). We should never find that return true. The only
expected pte bit combination is _PAGE_PTE | _PAGE_DEVMAP.

Some of the helpers are never expected to get called on hash translation
and hence is marked to call BUG() in such a case.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/hash.h |   9 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 155 --
 arch/powerpc/include/asm/book3s/64/radix.h|  36 
 .../include/asm/book3s/64/tlbflush-radix.h|   2 +
 arch/powerpc/include/asm/book3s/64/tlbflush.h |   8 +
 arch/powerpc/mm/book3s64/pgtable.c|  78 +
 arch/powerpc/mm/book3s64/radix_pgtable.c  |  28 
 arch/powerpc/mm/book3s64/radix_tlb.c  |   7 +
 arch/powerpc/platforms/Kconfig.cputype|   1 +
 include/trace/events/thp.h|  10 ++
 10 files changed, 323 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index d4a19e6547ac..6e70ae511631 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -138,7 +138,16 @@ static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 }
 
 #definehash__pmd_bad(pmd)  (pmd_val(pmd) & H_PMD_BAD_BITS)
+
+/*
+ * pud comparison that will work with both pte and page table pointer.
+ */
+static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
+{
+   return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & 
~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
 #definehash__pud_bad(pud)  (pud_val(pud) & H_PUD_BAD_BITS)
+
 static inline int hash__p4d_bad(p4d_t p4d)
 {
return (p4d_val(p4d) == 0);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4acc9690f599..38ac50279199 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -921,8 +921,29 @@ static inline pud_t pte_pud(pte_t pte)
 {
return __pud_raw(pte_raw(pte));
 }
+
+static inline pte_t *pudp_ptep(pud_t *pud)
+{
+   return (pte_t *)pud;
+}
+
+#define pud_pfn(pud)   pte_pfn(pud_pte(pud))
+#define pud_dirty(pud) pte_dirty(pud_pte(pud))
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkold(pud) pte_pud(pte_mkold(pud_pte(pud)))
+#define pud_wrprotect(pud) pte_pud(pte_wrprotect(pud_pte(pud)))
+#define pud_mkdirty(pud)   pte_pud(pte_mkdirty(pud_pte(pud)))
+#define pud_mkclean(pud)   pte_pud(pte_mkclean(pud_pte(pud)))
+#define pud_mkyoung(pud)   pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite(pud)   pte_pud(pte_mkwrite(pud_pte(pud)))
 #define pud_write(pud) pte_write(pud_pte(pud))
 
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pud_soft_dirty(pmd)pte_soft_dirty(pud_pte(pud))
+#define pud_mksoft_dirty(pmd)  pte_pud(pte_mksoft_dirty(pud_pte(pud)))
+#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 static inline int pud_bad(pud_t pud)
 {
if (radix_enabled())
@@ -1115,15 +1136,24 @@ static inline bool pmd_access_permitted(pmd_t pmd, bool 
write)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
 extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
 extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
 extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
   pmd_t *pmdp, pmd_t pmd);
+extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
+  pud_t *pudp, pud_t pud);
+
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
 {
 }
 
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+   unsigned long addr, pud_t *pud)
+{
+}
+
 extern int hash__has_transparent_hugepage(void);
 static inline int has_transparent_hugepage(void)
 {
@@ -1133,6 +1163,14 @@ static inline int has_transparent_hugepage(void)
 }
 #define has_transparent_hugepage has_transparent_hugepage
 
+static inline int has_transparent_pud_hugepage(void)
+{
+   if (radix_enabled())
+   return radix__has_transparent_pud_hugepage();
+   return 0;
+}
+#define has_transparent_pud_hugepage has_transparent_pud_hugepage
+
 static inline unsigned long
 pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
unsigned long clr, unsigned long set)
@@ -1142,6 +1180,16 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long 
addr, 

[PATCH v4 08/13] powerpc/mm/trace: Convert trace event to trace event class

2023-07-10 Thread Aneesh Kumar K.V
A follow-up patch will add a pud variant for this same event.
Using event class makes that addition simpler.

No functional change in this patch.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c |  2 +-
 include/trace/events/thp.h   | 23 ---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 51f48984abca..988948d69bc1 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -214,7 +214,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct 
*mm, unsigned long addr
 
old = be64_to_cpu(old_be);
 
-   trace_hugepage_update(addr, old, clr, set);
+   trace_hugepage_update_pmd(addr, old, clr, set);
if (old & H_PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e7ea492ac510..02e185d2e4d6 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -962,7 +962,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct 
*mm, unsigned long add
 #endif
 
old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
-   trace_hugepage_update(addr, old, clr, set);
+   trace_hugepage_update_pmd(addr, old, clr, set);
 
return old;
 }
diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
index 202b3e3e67ff..a95c78b10561 100644
--- a/include/trace/events/thp.h
+++ b/include/trace/events/thp.h
@@ -8,25 +8,29 @@
 #include 
 #include 
 
-TRACE_EVENT(hugepage_set_pmd,
+DECLARE_EVENT_CLASS(hugepage_set,
 
-   TP_PROTO(unsigned long addr, unsigned long pmd),
-   TP_ARGS(addr, pmd),
+   TP_PROTO(unsigned long addr, unsigned long pte),
+   TP_ARGS(addr, pte),
TP_STRUCT__entry(
__field(unsigned long, addr)
-   __field(unsigned long, pmd)
+   __field(unsigned long, pte)
),
 
TP_fast_assign(
__entry->addr = addr;
-   __entry->pmd = pmd;
+   __entry->pte = pte;
),
 
-   TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, 
__entry->pmd)
+   TP_printk("Set page table entry with 0x%lx with 0x%lx", 
__entry->addr, __entry->pte)
 );
 
+DEFINE_EVENT(hugepage_set, hugepage_set_pmd,
+   TP_PROTO(unsigned long addr, unsigned long pmd),
+   TP_ARGS(addr, pmd)
+);
 
-TRACE_EVENT(hugepage_update,
+DECLARE_EVENT_CLASS(hugepage_update,
 
TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, 
unsigned long set),
TP_ARGS(addr, pte, clr, set),
@@ -48,6 +52,11 @@ TRACE_EVENT(hugepage_update,
TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 
0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
 );
 
+DEFINE_EVENT(hugepage_update, hugepage_update_pmd,
+   TP_PROTO(unsigned long addr, unsigned long pmd, unsigned long clr, 
unsigned long set),
+   TP_ARGS(addr, pmd, clr, set)
+);
+
 DECLARE_EVENT_CLASS(migration_pmd,
 
TP_PROTO(unsigned long addr, unsigned long pmd),
-- 
2.41.0



[PATCH v4 07/13] mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization

2023-07-10 Thread Aneesh Kumar K.V
Arm disabled hugetlb vmemmap optimization [1] because hugetlb vmemmap
optimization includes an update of both the permissions (writeable to
read-only) and the output address (pfn) of the vmemmap ptes. That is not
supported without unmapping of pte(marking it invalid) by some
architectures.

With DAX vmemmap optimization we don't require such pte updates and
architectures can enable DAX vmemmap optimization while having hugetlb
vmemmap optimization disabled. Hence split DAX optimization support into a
different config.

s390, loongarch and riscv don't have devdax support. So the DAX config is not
enabled for them. With this change, arm64 should be able to select DAX
optimization

[1] commit 060a2c92d1b6 ("arm64: mm: hugetlb: Disable 
HUGETLB_PAGE_OPTIMIZE_VMEMMAP")

Signed-off-by: Aneesh Kumar K.V 
---
 arch/loongarch/Kconfig | 2 +-
 arch/riscv/Kconfig | 2 +-
 arch/s390/Kconfig  | 2 +-
 arch/x86/Kconfig   | 3 ++-
 fs/Kconfig | 2 +-
 include/linux/mm.h | 2 +-
 mm/Kconfig | 5 -
 7 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e55511af4c77..537ca2a4005a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -59,7 +59,7 @@ config LOONGARCH
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_LD_ORPHAN_WARN
-   select ARCH_WANT_OPTIMIZE_VMEMMAP
+   select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_NO_INSTR
select BUILDTIME_TABLE_SORT
select COMMON_CLK
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4c07b9189c86..6943d34c1ec1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -53,7 +53,7 @@ config RISCV
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
-   select ARCH_WANT_OPTIMIZE_VMEMMAP
+   select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5b39918b7042..975fd06e4f4d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -127,7 +127,7 @@ config S390
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
-   select ARCH_WANT_OPTIMIZE_VMEMMAP
+   select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
select DMA_OPS if PCI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7422db409770..78224aa76409 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -128,7 +128,8 @@ config X86
select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN
-   select ARCH_WANT_OPTIMIZE_VMEMMAP   if X86_64
+   select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP   if X86_64
+   select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP   if X86_64
select ARCH_WANTS_THP_SWAP  if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
diff --git a/fs/Kconfig b/fs/Kconfig
index 18d034ec7953..9c104c130a6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -252,7 +252,7 @@ config HUGETLB_PAGE
 
 config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
def_bool HUGETLB_PAGE
-   depends on ARCH_WANT_OPTIMIZE_VMEMMAP
+   depends on ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
depends on SPARSEMEM_VMEMMAP
 
 config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1a2234ee14d2..83f51ec0897d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3640,7 +3640,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
 #endif
 
 #define VMEMMAP_RESERVE_NR 2
-#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
 static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
  struct dev_pagemap *pgmap)
 {
diff --git a/mm/Kconfig b/mm/Kconfig
index 09130434e30d..923bd35f81f2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -487,7 +487,10 @@ config SPARSEMEM_VMEMMAP
 # Select this config option from the architecture Kconfig, if it is preferred
 # to enable the feature of HugeTLB/dev_dax vmemmap optimization.
 #
-config ARCH_WANT_OPTIMIZE_VMEMMAP
+config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+   bool
+
+config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
bool
 
 config HAVE_MEMBLOCK_PHYS_MAP
-- 
2.41.0



[PATCH v4 06/13] mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE

2023-07-10 Thread Aneesh Kumar K.V
pudp_set_wrprotect and move_huge_pud helpers are only used when
CONFIG_TRANSPARENT_HUGEPAGE is enabled. Similar to pmdp_set_wrprotect and
move_huge_pmd_helpers use architecture override only if
CONFIG_TRANSPARENT_HUGEPAGE is set

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/pgtable.h | 2 ++
 mm/mremap.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 91def34f7784..b5af3e014606 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -558,6 +558,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 #endif
 #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void pudp_set_wrprotect(struct mm_struct *mm,
  unsigned long address, pud_t *pudp)
 {
@@ -571,6 +572,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm,
 {
BUILD_BUG();
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 #endif
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 11e06e4ab33b..056478c106ee 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -349,7 +349,7 @@ static inline bool move_normal_pud(struct vm_area_struct 
*vma,
 }
 #endif
 
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && 
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
 static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
  unsigned long new_addr, pud_t *old_pud, pud_t 
*new_pud)
 {
-- 
2.41.0



[PATCH v4 05/13] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME

2023-07-10 Thread Aneesh Kumar K.V
This helps architectures to override pmd_same and pud_same independently.

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 6fd9b2831338..91def34f7784 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -693,7 +693,9 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 {
return pmd_val(pmd_a) == pmd_val(pmd_b);
 }
+#endif
 
+#ifndef __HAVE_ARCH_PUD_SAME
 static inline int pud_same(pud_t pud_a, pud_t pud_b)
 {
return pud_val(pud_a) == pud_val(pud_b);
-- 
2.41.0



[PATCH v4 04/13] mm/vmemmap: Allow architectures to override how vmemmap optimization works

2023-07-10 Thread Aneesh Kumar K.V
Architectures like powerpc will like to use different page table allocators
and mapping mechanisms to implement vmemmap optimization. Similar to
vmemmap_populate allow architectures to implement
vmemap_populate_compound_pages

Signed-off-by: Aneesh Kumar K.V 
---
 mm/sparse-vmemmap.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a044a130405b..541b3f69a481 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -141,6 +141,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
start, end - 1);
 }
 
+#ifndef vmemmap_populate_compound_pages
 pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int 
node,
   struct vmem_altmap *altmap,
   struct page *reuse)
@@ -446,6 +447,8 @@ static int __meminit 
vmemmap_populate_compound_pages(unsigned long start_pfn,
return 0;
 }
 
+#endif
+
 struct page * __meminit __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
-- 
2.41.0



[PATCH v4 03/13] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override

2023-07-10 Thread Aneesh Kumar K.V
dax vmemmap optimization requires a minimum of 2 PAGE_SIZE area within
vmemmap such that tail page mapping can point to the second PAGE_SIZE area.
Enforce that in vmemmap_can_optimize() function.

Architectures like powerpc also want to enable vmemmap optimization
conditionally (only with radix MMU translation). Hence allow architecture
override.

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/mm.h | 27 +++
 mm/mm_init.c   |  2 +-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2dd73e4f3d8e..1a2234ee14d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3639,13 +3639,32 @@ void vmemmap_free(unsigned long start, unsigned long 
end,
struct vmem_altmap *altmap);
 #endif
 
+#define VMEMMAP_RESERVE_NR 2
 #ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP
-static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
-  struct dev_pagemap *pgmap)
+static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
 {
-   return is_power_of_2(sizeof(struct page)) &&
-   pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap;
+   unsigned long nr_pages;
+   unsigned long nr_vmemmap_pages;
+
+   if (!pgmap || !is_power_of_2(sizeof(struct page)))
+   return false;
+
+   nr_pages = pgmap_vmemmap_nr(pgmap);
+   nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
+   /*
+* For vmemmap optimization with DAX we need minimum 2 vmemmap
+* pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
+*/
+   return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
 }
+/*
+ * If we don't have an architecture override, use the generic rule
+ */
+#ifndef vmemmap_can_optimize
+#define vmemmap_can_optimize __vmemmap_can_optimize
+#endif
+
 #else
 static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
   struct dev_pagemap *pgmap)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index a1963c3322af..245ac69b66a5 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1020,7 +1020,7 @@ static inline unsigned long compound_nr_pages(struct 
vmem_altmap *altmap,
if (!vmemmap_can_optimize(altmap, pgmap))
return pgmap_vmemmap_nr(pgmap);
 
-   return 2 * (PAGE_SIZE / sizeof(struct page));
+   return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page));
 }
 
 static void __ref memmap_init_compound(struct page *head,
-- 
2.41.0



[PATCH v4 02/13] mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg

2023-07-10 Thread Aneesh Kumar K.V
We will use this in a later patch to do tlb flush when clearing pud entries
on powerpc. This is similar to commit 93a98695f2f9 ("mm: change
pmdp_huge_get_and_clear_full take vm_area_struct as arg")

Signed-off-by: Aneesh Kumar K.V 
---
 include/linux/pgtable.h | 4 ++--
 mm/debug_vm_pgtable.c   | 2 +-
 mm/huge_memory.c| 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index cf13f8d938a8..6fd9b2831338 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -450,11 +450,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct 
vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
-static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
int full)
 {
-   return pudp_huge_get_and_clear(mm, address, pudp);
+   return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
 }
 #endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index ee119e33fef1..ee2c4c1dcfc8 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -385,7 +385,7 @@ static void __init pud_advanced_tests(struct 
pgtable_debug_args *args)
WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
 
 #ifndef __PAGETABLE_PMD_FOLDED
-   pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
+   pudp_huge_get_and_clear_full(args->vma, vaddr, args->pudp, 1);
pud = READ_ONCE(*args->pudp);
WARN_ON(!pud_none(pud));
 #endif /* __PAGETABLE_PMD_FOLDED */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..ba20cef681a4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1981,7 +1981,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
if (!ptl)
return 0;
 
-   pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm);
+   pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
tlb_remove_pud_tlb_entry(tlb, pud, addr);
if (vma_is_special_huge(vma)) {
spin_unlock(ptl);
-- 
2.41.0



[PATCH v4 01/13] mm/hugepage pud: Allow arch-specific helper function to check huge page pud support

2023-07-10 Thread Aneesh Kumar K.V
Architectures like powerpc would like to enable transparent huge page pud
support only with radix translation. To support that add
has_transparent_pud_hugepage() helper that architectures can override.

Signed-off-by: Aneesh Kumar K.V 
---
 drivers/nvdimm/pfn_devs.c | 2 +-
 include/linux/pgtable.h   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index af7d9301520c..18ad315581ca 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -100,7 +100,7 @@ static unsigned long *nd_pfn_supported_alignments(unsigned 
long *alignments)
 
if (has_transparent_hugepage()) {
alignments[1] = HPAGE_PMD_SIZE;
-   if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+   if (has_transparent_pud_hugepage())
alignments[2] = HPAGE_PUD_SIZE;
}
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5063b482e34f..cf13f8d938a8 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1499,6 +1499,9 @@ typedef unsigned int pgtbl_mod_mask;
 #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
 #endif
 
+#ifndef has_transparent_pud_hugepage
+#define has_transparent_pud_hugepage() 
IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+#endif
 /*
  * On some architectures it depends on the mm if the p4d/pud or pmd
  * layer of the page table hierarchy is folded or not.
-- 
2.41.0



[PATCH v4 00/13] Add support for DAX vmemmap optimization for ppc64

2023-07-10 Thread Aneesh Kumar K.V
This patch series implements changes required to support DAX vmemmap
optimization for ppc64. The vmemmap optimization is only enabled with radix MMU
translation and 1GB PUD mapping with 64K page size. The patch series also split
hugetlb vmemmap optimization as a separate Kconfig variable so that
architectures can enable DAX vmemmap optimization without enabling hugetlb
vmemmap optimization. This should enable architectures like arm64 to enable DAX
vmemmap optimization while they can't enable hugetlb vmemmap optimization. More
details of the same are in patch "mm/vmemmap optimization: Split hugetlb and
devdax vmemmap optimization"

Changes from v3:
* Rebase to latest linus tree
* Build fix with SPARSEMEM_VMEMMP disabled
* Add hash_pud_same outisde THP Kconfig

Changes from v2:
* Rebase to latest linus tree
* Address review feedback

Changes from V1:
* Fix make htmldocs warning
* Fix vmemmap allocation bugs with different alignment values.
* Correctly check for section validity to before we free vmemmap area



Aneesh Kumar K.V (13):
  mm/hugepage pud: Allow arch-specific helper function to check huge
page pud support
  mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg
  mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to
override
  mm/vmemmap: Allow architectures to override how vmemmap optimization
works
  mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME
  mm/huge pud: Use transparent huge pud helpers only with
CONFIG_TRANSPARENT_HUGEPAGE
  mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization
  powerpc/mm/trace: Convert trace event to trace event class
  powerpc/book3s64/mm: Enable transparent pud hugepage
  powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap
handling function
  powerpc/book3s64/radix: Add support for vmemmap optimization for radix
  powerpc/book3s64/radix: Remove mmu_vmemmap_psize
  powerpc/book3s64/radix: Add debug message to give more details of
vmemmap allocation

 Documentation/mm/vmemmap_dedup.rst|   1 +
 Documentation/powerpc/index.rst   |   1 +
 Documentation/powerpc/vmemmap_dedup.rst   | 101 +++
 arch/loongarch/Kconfig|   2 +-
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/book3s/64/hash.h |   9 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 155 -
 arch/powerpc/include/asm/book3s/64/radix.h|  47 ++
 .../include/asm/book3s/64/tlbflush-radix.h|   2 +
 arch/powerpc/include/asm/book3s/64/tlbflush.h |   8 +
 arch/powerpc/include/asm/pgtable.h|   4 +
 arch/powerpc/mm/book3s64/hash_pgtable.c   |   2 +-
 arch/powerpc/mm/book3s64/pgtable.c|  78 +++
 arch/powerpc/mm/book3s64/radix_pgtable.c  | 573 --
 arch/powerpc/mm/book3s64/radix_tlb.c  |   7 +
 arch/powerpc/mm/init_64.c |  37 +-
 arch/powerpc/platforms/Kconfig.cputype|   1 +
 arch/riscv/Kconfig|   2 +-
 arch/s390/Kconfig |   2 +-
 arch/x86/Kconfig  |   3 +-
 drivers/nvdimm/pfn_devs.c |   2 +-
 fs/Kconfig|   2 +-
 include/linux/mm.h|  29 +-
 include/linux/pgtable.h   |  11 +-
 include/trace/events/thp.h|  33 +-
 mm/Kconfig|   5 +-
 mm/debug_vm_pgtable.c |   2 +-
 mm/huge_memory.c  |   2 +-
 mm/mm_init.c  |   2 +-
 mm/mremap.c   |   2 +-
 mm/sparse-vmemmap.c   |   3 +
 31 files changed, 1047 insertions(+), 82 deletions(-)
 create mode 100644 Documentation/powerpc/vmemmap_dedup.rst

-- 
2.41.0



Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann

Hi

Am 10.07.23 um 16:24 schrieb Miguel Ojeda:

On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann  wrote:


The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
not set it.


`framebuffer_alloc()` does indeed use `kzalloc()`, but the docs do not
mention the zeroing. Should that guarantee be documented?


I'll append a patch to the series that documents this.




Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.


occurences -> occurrences

can -> will maybe? Since the intention of the patch series is to
remove it (them) altogether).


Sure.

Best regards
Thomas



Thanks!

Cheers,
Miguel


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nuernberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
HRB 36809 (AG Nuernberg)


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Miguel Ojeda
On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann  wrote:
>
> The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
> fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
> not set it.

`framebuffer_alloc()` does indeed use `kzalloc()`, but the docs do not
mention the zeroing. Should that guarantee be documented?

> Flags should signal differences from the default values. After cleaning
> up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

occurences -> occurrences

can -> will maybe? Since the intention of the patch series is to
remove it (them) altogether).

Thanks!

Cheers,
Miguel


Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread John Paul Adrian Glaubitz
Hi!

On Mon, 2023-07-10 at 16:04 +0200, Thomas Zimmermann wrote:
> > > I won't argue with that, but the flag itself is wrong.
> > > FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct
> > > fb_videomode.flag. The valid flags for this field are at [1]. If
> > > anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which
> > > has the same value.
> > > 
> > > [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681
> > 
> > FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that 
> > instead?
> 
> Sure, I'll update the patch accordingly.

Thanks! I'll ack the updated patch.

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread Thomas Zimmermann

Hi

Am 10.07.23 um 15:59 schrieb John Paul Adrian Glaubitz:

Hi Thomas!

On Mon, 2023-07-10 at 15:52 +0200, Thomas Zimmermann wrote:

I would argue that the current code is more readable that your proposed change.

I agree that it's a no-op, but code is not just about functionality but also
readability, isn't it?


I won't argue with that, but the flag itself is wrong.
FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct
fb_videomode.flag. The valid flags for this field are at [1]. If
anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which
has the same value.

[1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681


FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that 
instead?


Sure, I'll update the patch accordingly.

Best regards
Thomas





Also, I prefer "sh:" as the architecture prefix, not "arch/sh:".


Ok.


Thanks.

Adrian



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nuernberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
HRB 36809 (AG Nuernberg)


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread John Paul Adrian Glaubitz
Hi Thomas!

On Mon, 2023-07-10 at 15:52 +0200, Thomas Zimmermann wrote:
> > I would argue that the current code is more readable that your proposed 
> > change.
> > 
> > I agree that it's a no-op, but code is not just about functionality but also
> > readability, isn't it?
> 
> I won't argue with that, but the flag itself is wrong. 
> FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct 
> fb_videomode.flag. The valid flags for this field are at [1]. If 
> anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which 
> has the same value.
> 
> [1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681

FB_MODE_IS_UNKNOWN sounds very reasonable to me. Would you agree using that 
instead?

> > 
> > Also, I prefer "sh:" as the architecture prefix, not "arch/sh:".
> 
> Ok.

Thanks.

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread Thomas Zimmermann

Hi

Am 10.07.23 um 15:42 schrieb John Paul Adrian Glaubitz:

Hi Thomas!

On Mon, 2023-07-10 at 14:50 +0200, Thomas Zimmermann wrote:

FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info.
Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT
is 0 and the static declaration already clears the memory area of
sh7763fb_videomode. So remove the assignment.

Signed-off-by: Thomas Zimmermann 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: John Paul Adrian Glaubitz 
---
  arch/sh/boards/mach-sh7763rdp/setup.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c 
b/arch/sh/boards/mach-sh7763rdp/setup.c
index 97e715e4e9b3..345f2b76c85a 100644
--- a/arch/sh/boards/mach-sh7763rdp/setup.c
+++ b/arch/sh/boards/mach-sh7763rdp/setup.c
@@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = {
.vsync_len = 1,
.sync = 0,
.vmode = FB_VMODE_NONINTERLACED,
-   .flag = FBINFO_FLAG_DEFAULT,
  };
  
  static struct sh7760fb_platdata sh7763fb_def_pdata = {


I would argue that the current code is more readable that your proposed change.

I agree that it's a no-op, but code is not just about functionality but also
readability, isn't it?


I won't argue with that, but the flag itself is wrong. 
FBINFO_FLAG_DEFAULT is/was for struct fb_info.flags. You have struct 
fb_videomode.flag. The valid flags for this field are at [1]. If 
anything, the field could be initialized to FB_MODE_IS_UNKNOWN, which 
has the same value.


[1] https://elixir.bootlin.com/linux/latest/source/include/linux/fb.h#L681



Also, I prefer "sh:" as the architecture prefix, not "arch/sh:".


Ok.

Best regards
Thomas


Thanks,
Adrian



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nuernberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
HRB 36809 (AG Nuernberg)


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread John Paul Adrian Glaubitz
Hi Thomas!

On Mon, 2023-07-10 at 14:50 +0200, Thomas Zimmermann wrote:
> FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info.
> Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT
> is 0 and the static declaration already clears the memory area of
> sh7763fb_videomode. So remove the assignment.
> 
> Signed-off-by: Thomas Zimmermann 
> Cc: Yoshinori Sato 
> Cc: Rich Felker 
> Cc: John Paul Adrian Glaubitz 
> ---
>  arch/sh/boards/mach-sh7763rdp/setup.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c 
> b/arch/sh/boards/mach-sh7763rdp/setup.c
> index 97e715e4e9b3..345f2b76c85a 100644
> --- a/arch/sh/boards/mach-sh7763rdp/setup.c
> +++ b/arch/sh/boards/mach-sh7763rdp/setup.c
> @@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = {
>   .vsync_len = 1,
>   .sync = 0,
>   .vmode = FB_VMODE_NONINTERLACED,
> - .flag = FBINFO_FLAG_DEFAULT,
>  };
>  
>  static struct sh7760fb_platdata sh7763fb_def_pdata = {

I would argue that the current code is more readable that your proposed change.

I agree that it's a no-op, but code is not just about functionality but also
readability, isn't it?

Also, I prefer "sh:" as the architecture prefix, not "arch/sh:".

Thanks,
Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: [PATCH 10/17] hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver

2023-07-10 Thread Benjamin Tissoires
On Mon, Jul 10, 2023 at 3:01 PM Thomas Zimmermann  wrote:
>
> The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
> fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
> not set it.
>
> Flags should signal differences from the default values. After cleaning
> up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.
>
> Signed-off-by: Thomas Zimmermann 
> Cc: "Bruno Prémont" 
> Cc: Jiri Kosina 
> Cc: Benjamin Tissoires 

Acked-by: Benjamin Tissoires 

Feel free to take this through the DRI tree (or any other that handles
FB) with the rest of the series if you want.

Cheers,
Benjamin

> ---
>  drivers/hid/hid-picolcd_fb.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
> index dabcd054dad9..d726aaafb146 100644
> --- a/drivers/hid/hid-picolcd_fb.c
> +++ b/drivers/hid/hid-picolcd_fb.c
> @@ -527,7 +527,6 @@ int picolcd_init_framebuffer(struct picolcd_data *data)
> info->var = picolcdfb_var;
> info->fix = picolcdfb_fix;
> info->fix.smem_len   = PICOLCDFB_SIZE*8;
> -   info->flags = FBINFO_FLAG_DEFAULT;
>
> fbdata = info->par;
> spin_lock_init(>lock);
> --
> 2.41.0
>



[PATCH 05/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by framebuffer_alloc(). So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
Cc: Nicolas Ferre 
Cc: Benjamin Herrenschmidt 
Cc: Ferenc Bakonyi 
Cc: "K. Y. Srinivasan" 
Cc: Haiyang Zhang 
Cc: Wei Liu 
Cc: Dexuan Cui 
Cc: Antonino Daplas 
Cc: Maik Broemme 
Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Kristoffer Ericson 
Cc: Hans de Goede 
Cc: Steve Glendinning 
Cc: Bernie Thompson 
Cc: Florian Tobias Schandinat 
---
 drivers/video/fbdev/amifb.c  | 5 ++---
 drivers/video/fbdev/asiliantfb.c | 1 -
 drivers/video/fbdev/atmel_lcdfb.c| 2 +-
 drivers/video/fbdev/aty/atyfb_base.c | 3 +--
 drivers/video/fbdev/aty/radeon_base.c| 3 +--
 drivers/video/fbdev/bw2.c| 1 -
 drivers/video/fbdev/carminefb.c  | 1 -
 drivers/video/fbdev/cg14.c   | 2 +-
 drivers/video/fbdev/cg3.c| 1 -
 drivers/video/fbdev/cg6.c| 2 +-
 drivers/video/fbdev/chipsfb.c| 1 -
 drivers/video/fbdev/cirrusfb.c   | 3 +--
 drivers/video/fbdev/clps711x-fb.c| 1 -
 drivers/video/fbdev/cobalt_lcdfb.c   | 1 -
 drivers/video/fbdev/ep93xx-fb.c  | 1 -
 drivers/video/fbdev/ffb.c| 3 +--
 drivers/video/fbdev/fm2fb.c  | 1 -
 drivers/video/fbdev/gbefb.c  | 1 -
 drivers/video/fbdev/geode/gx1fb_core.c   | 1 -
 drivers/video/fbdev/geode/gxfb_core.c| 1 -
 drivers/video/fbdev/geode/lxfb_core.c| 1 -
 drivers/video/fbdev/grvga.c  | 2 +-
 drivers/video/fbdev/hgafb.c  | 2 +-
 drivers/video/fbdev/hitfb.c  | 2 +-
 drivers/video/fbdev/hyperv_fb.c  | 2 --
 drivers/video/fbdev/i740fb.c | 2 +-
 drivers/video/fbdev/i810/i810_main.c | 4 ++--
 drivers/video/fbdev/imsttfb.c| 3 +--
 drivers/video/fbdev/intelfb/intelfbdrv.c | 4 ++--
 drivers/video/fbdev/kyro/fbdev.c | 1 -
 drivers/video/fbdev/leo.c| 1 -
 drivers/video/fbdev/mb862xx/mb862xxfbdrv.c   | 2 +-
 drivers/video/fbdev/mmp/fb/mmpfb.c   | 2 +-
 drivers/video/fbdev/neofb.c  | 2 +-
 drivers/video/fbdev/nvidia/nvidia.c  | 4 ++--
 drivers/video/fbdev/offb.c   | 2 +-
 drivers/video/fbdev/p9100.c  | 1 -
 drivers/video/fbdev/platinumfb.c | 1 -
 drivers/video/fbdev/pm2fb.c  | 3 +--
 drivers/video/fbdev/pm3fb.c  | 3 +--
 drivers/video/fbdev/pmag-aa-fb.c | 1 -
 drivers/video/fbdev/pmag-ba-fb.c | 1 -
 drivers/video/fbdev/pmagb-b-fb.c | 1 -
 drivers/video/fbdev/ps3fb.c  | 2 +-
 drivers/video/fbdev/pvr2fb.c | 2 +-
 drivers/video/fbdev/pxa168fb.c   | 2 +-
 drivers/video/fbdev/q40fb.c  | 1 -
 drivers/video/fbdev/riva/fbdev.c | 3 +--
 drivers/video/fbdev/s1d13xxxfb.c | 4 ++--
 drivers/video/fbdev/savage/savagefb_driver.c | 3 +--
 drivers/video/fbdev/simplefb.c   | 1 -
 drivers/video/fbdev/sis/sis_main.c   | 3 +--
 drivers/video/fbdev/skeletonfb.c | 2 +-
 drivers/video/fbdev/smscufx.c| 2 +-
 drivers/video/fbdev/sstfb.c  | 1 -
 drivers/video/fbdev/sunxvr1000.c | 1 -
 drivers/video/fbdev/sunxvr2500.c | 1 -
 drivers/video/fbdev/sunxvr500.c  | 1 -
 drivers/video/fbdev/tcx.c| 1 -
 drivers/video/fbdev/tdfxfb.c | 2 +-
 drivers/video/fbdev/tgafb.c  | 2 +-
 drivers/video/fbdev/tridentfb.c  | 2 +-
 drivers/video/fbdev/udlfb.c  | 2 +-
 drivers/video/fbdev/via/viafbdev.c   | 2 +-
 64 files changed, 41 insertions(+), 81 deletions(-)

diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c
index d88265dbebf4..cea782283b9c 100644
--- a/drivers/video/fbdev/amifb.c
+++ b/drivers/video/fbdev/amifb.c
@@ -2427,7 +2427,7 @@ static int amifb_set_par(struct fb_info *info)
info->fix.ywrapstep = 1;
info->fix.xpanstep = 0;
info->fix.ypanstep = 0;
-   info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YWRAP |
+   info->flags = FBINFO_HWACCEL_YWRAP |
FBINFO_READS_FAST; /* override SCROLL_REDRAW */
} else {
info->fix.ywrapstep = 0;
@@ -2436,7 +2436,7 @@ static int amifb_set_par(struct fb_info *info)
else
info->fix.xpanstep = 16 << maxfmode;
info->fix.ypanstep = 1;
-   info->flags = FBINFO_DEFAULT | 

[PATCH 17/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT

2023-07-10 Thread Thomas Zimmermann
Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT. No
functional changes.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 include/linux/fb.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 1d5c13f34b09..43458f582f35 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -383,7 +383,6 @@ struct fb_tile_ops {
 #endif /* CONFIG_FB_TILEBLITTING */
 
 /* FBINFO_* = fb_info.flags bit flags */
-#define FBINFO_DEFAULT 0
 #define FBINFO_HWACCEL_DISABLED0x0002
/* When FBINFO_HWACCEL_DISABLED is set:
 *  Hardware acceleration is turned off.  Software implementations
@@ -504,8 +503,6 @@ struct fb_info {
bool skip_vt_switch; /* no VT switch on suspend/resume required */
 };
 
-#define FBINFO_FLAG_DEFAULTFBINFO_DEFAULT
-
 /* This will go away
  * fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags
  * when it wants to turn the acceleration engine on.  This is
-- 
2.41.0



[PATCH 09/17] auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Miguel Ojeda 
Cc: Robin van der Gracht 
---
 drivers/auxdisplay/cfag12864bfb.c | 1 -
 drivers/auxdisplay/ht16k33.c  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/auxdisplay/cfag12864bfb.c 
b/drivers/auxdisplay/cfag12864bfb.c
index c2cab7e2b126..729845bcc803 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -79,7 +79,6 @@ static int cfag12864bfb_probe(struct platform_device *device)
info->var = cfag12864bfb_var;
info->pseudo_palette = NULL;
info->par = NULL;
-   info->flags = FBINFO_FLAG_DEFAULT;
 
if (register_framebuffer(info) < 0)
goto fballoced;
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index edaf92b7ea77..df3f37651e45 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -646,7 +646,6 @@ static int ht16k33_fbdev_probe(struct device *dev, struct 
ht16k33_priv *priv,
fbdev->info->var = ht16k33_fb_var;
fbdev->info->bl_dev = bl;
fbdev->info->pseudo_palette = NULL;
-   fbdev->info->flags = FBINFO_FLAG_DEFAULT;
fbdev->info->par = priv;
 
err = register_framebuffer(fbdev->info);
-- 
2.41.0



[PATCH 13/17] fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by kzalloc(). So do not
set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 drivers/video/fbdev/amba-clcd.c | 1 -
 drivers/video/fbdev/matrox/matroxfb_crtc2.c | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index e45338227be6..24d89e6fb780 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -461,7 +461,6 @@ static int clcdfb_register(struct clcd_fb *fb)
}
 
fb->fb.fbops= _ops;
-   fb->fb.flags= FBINFO_FLAG_DEFAULT;
fb->fb.pseudo_palette   = fb->cmap;
 
strncpy(fb->fb.fix.id, clcd_name, sizeof(fb->fb.fix.id));
diff --git a/drivers/video/fbdev/matrox/matroxfb_crtc2.c 
b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
index 7655afa3fd50..372197c124de 100644
--- a/drivers/video/fbdev/matrox/matroxfb_crtc2.c
+++ b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
@@ -603,9 +603,8 @@ static int matroxfb_dh_regit(const struct matrox_fb_info 
*minfo,
void* oldcrtc2;
 
m2info->fbcon.fbops = _dh_ops;
-   m2info->fbcon.flags = FBINFO_FLAG_DEFAULT;
-   m2info->fbcon.flags |= FBINFO_HWACCEL_XPAN |
-  FBINFO_HWACCEL_YPAN;
+   m2info->fbcon.flags = FBINFO_HWACCEL_XPAN |
+ FBINFO_HWACCEL_YPAN;
m2info->fbcon.pseudo_palette = m2info->cmap;
fb_alloc_cmap(>fbcon.cmap, 256, 1);
 
-- 
2.41.0



[PATCH 16/17] fbdev/pxafb: Remove flag FBINFO_FLAG_DEFAULT

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by devm_kzalloc(). So do not
set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 drivers/video/fbdev/pxafb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index c8c4677d06b4..beffb0602a2c 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -888,7 +888,6 @@ static void init_pxafb_overlay(struct pxafb_info *fbi, 
struct pxafb_layer *ofb,
ofb->fb.var.vmode   = FB_VMODE_NONINTERLACED;
 
ofb->fb.fbops   = _fb_ops;
-   ofb->fb.flags   = FBINFO_FLAG_DEFAULT;
ofb->fb.node= -1;
ofb->fb.pseudo_palette  = NULL;
 
-- 
2.41.0



[PATCH 14/17] fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by framebuffer_alloc(). So
do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Jaya Kumar 
Cc: Helge Deller 
Cc: Peter Jones 
Cc: Sascha Hauer 
Cc: Pengutronix Kernel Team 
Cc: Shawn Guo 
Cc: Fabio Estevam 
Cc: NXP Linux Team 
Cc: Maik Broemme 
Cc: Jingoo Han 
Cc: Sudip Mukherjee 
Cc: Teddy Wang 
Cc: Michal Januszewski 
---
 drivers/video/fbdev/arcfb.c| 1 -
 drivers/video/fbdev/aty/aty128fb.c | 1 -
 drivers/video/fbdev/broadsheetfb.c | 2 +-
 drivers/video/fbdev/da8xx-fb.c | 1 -
 drivers/video/fbdev/efifb.c| 1 -
 drivers/video/fbdev/goldfishfb.c   | 1 -
 drivers/video/fbdev/gxt4500.c  | 3 +--
 drivers/video/fbdev/hecubafb.c | 2 +-
 drivers/video/fbdev/imxfb.c| 3 +--
 drivers/video/fbdev/intelfb/intelfbdrv.c   | 1 -
 drivers/video/fbdev/metronomefb.c  | 2 +-
 drivers/video/fbdev/mx3fb.c| 1 -
 drivers/video/fbdev/omap/omapfb_main.c | 1 -
 drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 1 -
 drivers/video/fbdev/s3c-fb.c   | 1 -
 drivers/video/fbdev/sh_mobile_lcdcfb.c | 2 --
 drivers/video/fbdev/sis/sis_main.c | 2 --
 drivers/video/fbdev/sm501fb.c  | 2 +-
 drivers/video/fbdev/sm712fb.c  | 1 -
 drivers/video/fbdev/uvesafb.c  | 3 +--
 drivers/video/fbdev/vesafb.c   | 2 +-
 drivers/video/fbdev/vfb.c  | 1 -
 drivers/video/fbdev/vga16fb.c  | 2 +-
 drivers/video/fbdev/xen-fbfront.c  | 2 +-
 24 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 9aaea3be8281..cff11cb04a55 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -546,7 +546,6 @@ static int arcfb_probe(struct platform_device *dev)
par->c2io_addr = c2io_addr;
par->cslut[0] = 0x00;
par->cslut[1] = 0x06;
-   info->flags = FBINFO_FLAG_DEFAULT;
spin_lock_init(>lock);
if (irq) {
par->irq = irq;
diff --git a/drivers/video/fbdev/aty/aty128fb.c 
b/drivers/video/fbdev/aty/aty128fb.c
index 2d9320a52e51..b44fc78ccd4f 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -1927,7 +1927,6 @@ static int aty128_init(struct pci_dev *pdev, const struct 
pci_device_id *ent)
 
/* fill in info */
info->fbops = _ops;
-   info->flags = FBINFO_FLAG_DEFAULT;
 
par->lcd_on = default_lcd_on;
par->crt_on = default_crt_on;
diff --git a/drivers/video/fbdev/broadsheetfb.c 
b/drivers/video/fbdev/broadsheetfb.c
index cb725a91b6bb..e51e14c29c55 100644
--- a/drivers/video/fbdev/broadsheetfb.c
+++ b/drivers/video/fbdev/broadsheetfb.c
@@ -1069,7 +1069,7 @@ static int broadsheetfb_probe(struct platform_device *dev)
 
mutex_init(>io_lock);
 
-   info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
+   info->flags = FBINFO_VIRTFB;
 
info->fbdefio = _defio;
fb_deferred_io_init(info);
diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 60cd1286370f..988dedcf6be8 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -1463,7 +1463,6 @@ static int fb_probe(struct platform_device *device)
da8xx_fb_var.bits_per_pixel = lcd_cfg->bpp;
 
/* Initialize fbinfo */
-   da8xx_fb_info->flags = FBINFO_FLAG_DEFAULT;
da8xx_fb_info->fix = da8xx_fb_fix;
da8xx_fb_info->var = da8xx_fb_var;
da8xx_fb_info->fbops = _fb_ops;
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 3d7be69ab593..3391c8e84210 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -555,7 +555,6 @@ static int efifb_probe(struct platform_device *dev)
info->fbops = _ops;
info->var = efifb_defined;
info->fix = efifb_fix;
-   info->flags = FBINFO_FLAG_DEFAULT;
 
orientation = drm_get_panel_orientation_quirk(efifb_defined.xres,
  efifb_defined.yres);
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c
index 6fa2108fd912..ef2528c3faa9 100644
--- a/drivers/video/fbdev/goldfishfb.c
+++ b/drivers/video/fbdev/goldfishfb.c
@@ -212,7 +212,6 @@ static int goldfish_fb_probe(struct platform_device *pdev)
height = readl(fb->reg_base + FB_GET_HEIGHT);
 
fb->fb.fbops= _fb_ops;
-   fb->fb.flags= FBINFO_FLAG_DEFAULT;
fb->fb.pseudo_palette   = fb->cmap;
fb->fb.fix.type = FB_TYPE_PACKED_PIXELS;

[PATCH 10/17] hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: "Bruno Prémont" 
Cc: Jiri Kosina 
Cc: Benjamin Tissoires 
---
 drivers/hid/hid-picolcd_fb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
index dabcd054dad9..d726aaafb146 100644
--- a/drivers/hid/hid-picolcd_fb.c
+++ b/drivers/hid/hid-picolcd_fb.c
@@ -527,7 +527,6 @@ int picolcd_init_framebuffer(struct picolcd_data *data)
info->var = picolcdfb_var;
info->fix = picolcdfb_fix;
info->fix.smem_len   = PICOLCDFB_SIZE*8;
-   info->flags = FBINFO_FLAG_DEFAULT;
 
fbdata = info->par;
spin_lock_init(>lock);
-- 
2.41.0



[PATCH 15/17] fbdev/atafb: Remove flag FBINFO_FLAG_DEFAULT

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by a static declaration. So do
not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 drivers/video/fbdev/atafb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c
index 2bc4089865e6..c4a420b791b9 100644
--- a/drivers/video/fbdev/atafb.c
+++ b/drivers/video/fbdev/atafb.c
@@ -3112,7 +3112,6 @@ static int __init atafb_probe(struct platform_device 
*pdev)
 #ifdef ATAFB_FALCON
fb_info.pseudo_palette = current_par.hw.falcon.pseudo_palette;
 #endif
-   fb_info.flags = FBINFO_FLAG_DEFAULT;
 
if (!fb_find_mode(_info.var, _info, mode_option, atafb_modedb,
  NUM_TOTAL_MODES, _modedb[defmode],
-- 
2.41.0



[PATCH 03/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by kzalloc(). So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
Cc: Russell King 
---
 drivers/video/fbdev/controlfb.c   | 2 +-
 drivers/video/fbdev/cyber2000fb.c | 2 +-
 drivers/video/fbdev/valkyriefb.c  | 1 -
 drivers/video/fbdev/vermilion/vermilion.c | 2 +-
 drivers/video/fbdev/vt8500lcdfb.c | 3 +--
 5 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
index 82eeb139c4eb..717134c141ff 100644
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -775,7 +775,7 @@ static void __init control_init_info(struct fb_info *info, 
struct fb_info_contro
info->par = >par;
info->fbops = _ops;
info->pseudo_palette = p->pseudo_palette;
-info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+   info->flags = FBINFO_HWACCEL_YPAN;
info->screen_base = p->frame_buffer + CTRLFB_OFF;
 
fb_alloc_cmap(>cmap, 256, 0);
diff --git a/drivers/video/fbdev/cyber2000fb.c 
b/drivers/video/fbdev/cyber2000fb.c
index 38c0a6866d76..98ea56a9abf1 100644
--- a/drivers/video/fbdev/cyber2000fb.c
+++ b/drivers/video/fbdev/cyber2000fb.c
@@ -1459,7 +1459,7 @@ static struct cfb_info *cyberpro_alloc_fb_info(unsigned 
int id, char *name)
cfb->fb.var.accel_flags = FB_ACCELF_TEXT;
 
cfb->fb.fbops   = _ops;
-   cfb->fb.flags   = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+   cfb->fb.flags   = FBINFO_HWACCEL_YPAN;
cfb->fb.pseudo_palette  = cfb->pseudo_palette;
 
spin_lock_init(>reg_b0_lock);
diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c
index b166b7cfe0e5..fd4488777032 100644
--- a/drivers/video/fbdev/valkyriefb.c
+++ b/drivers/video/fbdev/valkyriefb.c
@@ -535,7 +535,6 @@ static int __init valkyrie_init_info(struct fb_info *info,
 {
info->fbops = _ops;
info->screen_base = p->frame_buffer + 0x1000;
-   info->flags = FBINFO_DEFAULT;
info->pseudo_palette = p->pseudo_palette;
info->par = >par;
return fb_alloc_cmap(>cmap, 256, 0);
diff --git a/drivers/video/fbdev/vermilion/vermilion.c 
b/drivers/video/fbdev/vermilion/vermilion.c
index 32e74e02a02f..71584c775efd 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -477,7 +477,7 @@ static int vml_pci_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
}
 
info = >info;
-   info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK;
+   info->flags = FBINFO_PARTIAL_PAN_OK;
 
err = vmlfb_enable_mmio(par);
if (err)
diff --git a/drivers/video/fbdev/vt8500lcdfb.c 
b/drivers/video/fbdev/vt8500lcdfb.c
index 31d4e85b220c..42d39a9d5130 100644
--- a/drivers/video/fbdev/vt8500lcdfb.c
+++ b/drivers/video/fbdev/vt8500lcdfb.c
@@ -300,8 +300,7 @@ static int vt8500lcd_probe(struct platform_device *pdev)
fbi->fb.var.vmode   = FB_VMODE_NONINTERLACED;
 
fbi->fb.fbops   = _ops;
-   fbi->fb.flags   = FBINFO_DEFAULT
-   | FBINFO_HWACCEL_COPYAREA
+   fbi->fb.flags   = FBINFO_HWACCEL_COPYAREA
| FBINFO_HWACCEL_FILLRECT
| FBINFO_HWACCEL_YPAN
| FBINFO_VIRTFB
-- 
2.41.0



[PATCH 11/17] media: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by kzalloc(). So do not
set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Andy Walls 
Cc: Mauro Carvalho Chehab 
Cc: Hans Verkuil 
---
 drivers/media/pci/ivtv/ivtvfb.c  | 1 -
 drivers/media/test-drivers/vivid/vivid-osd.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
index 0aeb9daaee4c..23c8c094e791 100644
--- a/drivers/media/pci/ivtv/ivtvfb.c
+++ b/drivers/media/pci/ivtv/ivtvfb.c
@@ -1048,7 +1048,6 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
/* Generate valid fb_info */
 
oi->ivtvfb_info.node = -1;
-   oi->ivtvfb_info.flags = FBINFO_FLAG_DEFAULT;
oi->ivtvfb_info.par = itv;
oi->ivtvfb_info.var = oi->ivtvfb_defined;
oi->ivtvfb_info.fix = oi->ivtvfb_fix;
diff --git a/drivers/media/test-drivers/vivid/vivid-osd.c 
b/drivers/media/test-drivers/vivid/vivid-osd.c
index ec25edc679b3..051f1805a16d 100644
--- a/drivers/media/test-drivers/vivid/vivid-osd.c
+++ b/drivers/media/test-drivers/vivid/vivid-osd.c
@@ -310,7 +310,6 @@ static int vivid_fb_init_vidmode(struct vivid_dev *dev)
/* Generate valid fb_info */
 
dev->fb_info.node = -1;
-   dev->fb_info.flags = FBINFO_FLAG_DEFAULT;
dev->fb_info.par = dev;
dev->fb_info.var = dev->fb_defined;
dev->fb_info.fix = dev->fb_fix;
-- 
2.41.0



[PATCH 08/17] arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag

2023-07-10 Thread Thomas Zimmermann
FBINFO_FLAG_DEFAULT is a flag for a framebuffer in struct fb_info.
Flags for videomodes are prefixed with FB_MODE_. FBINFO_FLAG_DEFAULT
is 0 and the static declaration already clears the memory area of
sh7763fb_videomode. So remove the assignment.

Signed-off-by: Thomas Zimmermann 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: John Paul Adrian Glaubitz 
---
 arch/sh/boards/mach-sh7763rdp/setup.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c 
b/arch/sh/boards/mach-sh7763rdp/setup.c
index 97e715e4e9b3..345f2b76c85a 100644
--- a/arch/sh/boards/mach-sh7763rdp/setup.c
+++ b/arch/sh/boards/mach-sh7763rdp/setup.c
@@ -119,7 +119,6 @@ static struct fb_videomode sh7763fb_videomode = {
.vsync_len = 1,
.sync = 0,
.vmode = FB_VMODE_NONINTERLACED,
-   .flag = FBINFO_FLAG_DEFAULT,
 };
 
 static struct sh7760fb_platdata sh7763fb_def_pdata = {
-- 
2.41.0



[PATCH 02/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by a static declaration. So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 drivers/video/fbdev/68328fb.c  | 2 +-
 drivers/video/fbdev/acornfb.c  | 2 +-
 drivers/video/fbdev/g364fb.c   | 2 +-
 drivers/video/fbdev/hpfb.c | 1 -
 drivers/video/fbdev/macfb.c| 1 -
 drivers/video/fbdev/maxinefb.c | 1 -
 6 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/video/fbdev/68328fb.c b/drivers/video/fbdev/68328fb.c
index 07d6e8dc686b..956dd2399cc0 100644
--- a/drivers/video/fbdev/68328fb.c
+++ b/drivers/video/fbdev/68328fb.c
@@ -448,7 +448,7 @@ static int __init mc68x328fb_init(void)
fb_info.var.red.offset = fb_info.var.green.offset = 
fb_info.var.blue.offset = 0;
}
fb_info.pseudo_palette = _pseudo_palette;
-   fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+   fb_info.flags = FBINFO_HWACCEL_YPAN;
 
if (fb_alloc_cmap(_info.cmap, 256, 0))
return -ENOMEM;
diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index 1b72edc01cfb..8fec21dfca09 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c
@@ -694,7 +694,7 @@ static void acornfb_init_fbinfo(void)
first = 0;
 
fb_info.fbops   = _ops;
-   fb_info.flags   = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+   fb_info.flags   = FBINFO_HWACCEL_YPAN;
fb_info.pseudo_palette  = current_par.pseudo_palette;
 
strcpy(fb_info.fix.id, "Acorn");
diff --git a/drivers/video/fbdev/g364fb.c b/drivers/video/fbdev/g364fb.c
index c5b7673ddc6c..0825cbde116e 100644
--- a/drivers/video/fbdev/g364fb.c
+++ b/drivers/video/fbdev/g364fb.c
@@ -219,7 +219,7 @@ int __init g364fb_init(void)
fb_info.screen_base = (char *) G364_MEM_BASE;   /* virtual kernel 
address */
fb_info.var = fb_var;
fb_info.fix = fb_fix;
-   fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+   fb_info.flags = FBINFO_HWACCEL_YPAN;
 
fb_alloc_cmap(_info.cmap, 255, 0);
 
diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c
index 77fbff47b1a8..406c1383cbda 100644
--- a/drivers/video/fbdev/hpfb.c
+++ b/drivers/video/fbdev/hpfb.c
@@ -287,7 +287,6 @@ static int hpfb_init_one(unsigned long phys_base, unsigned 
long virt_base)
else
strcat(fb_info.fix.id, "Catseye");
fb_info.fbops = _ops;
-   fb_info.flags = FBINFO_DEFAULT;
fb_info.var   = hpfb_defined;
fb_info.screen_base = (char *)fb_start;
 
diff --git a/drivers/video/fbdev/macfb.c b/drivers/video/fbdev/macfb.c
index 44ff860a3f37..5ca208d992cc 100644
--- a/drivers/video/fbdev/macfb.c
+++ b/drivers/video/fbdev/macfb.c
@@ -876,7 +876,6 @@ static int __init macfb_init(void)
fb_info.var = macfb_defined;
fb_info.fix = macfb_fix;
fb_info.pseudo_palette  = pseudo_palette;
-   fb_info.flags   = FBINFO_DEFAULT;
 
err = fb_alloc_cmap(_info.cmap, video_cmap_len, 0);
if (err)
diff --git a/drivers/video/fbdev/maxinefb.c b/drivers/video/fbdev/maxinefb.c
index 4e6b05232ae2..0ac1873b2acb 100644
--- a/drivers/video/fbdev/maxinefb.c
+++ b/drivers/video/fbdev/maxinefb.c
@@ -155,7 +155,6 @@ int __init maxinefb_init(void)
fb_info.screen_base = (char *)maxinefb_fix.smem_start;
fb_info.var = maxinefb_defined;
fb_info.fix = maxinefb_fix;
-   fb_info.flags = FBINFO_DEFAULT;
 
fb_alloc_cmap(_info.cmap, 256, 0);
 
-- 
2.41.0



[PATCH 01/17] drm: Remove flag FBINFO_DEFAULT from fbdev emulation

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by framebuffer_alloc(). So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Patrik Jakobsson 
Cc: Alex Deucher 
Cc: "Christian König" 
Cc: "Pan, Xinhui" 
---
 drivers/gpu/drm/drm_fbdev_dma.c   | 1 -
 drivers/gpu/drm/drm_fbdev_generic.c   | 1 -
 drivers/gpu/drm/gma500/fbdev.c| 2 +-
 drivers/gpu/drm/radeon/radeon_fbdev.c | 2 +-
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c
index 8217f1ddc007..bc5fdb1da6a3 100644
--- a/drivers/gpu/drm/drm_fbdev_dma.c
+++ b/drivers/gpu/drm/drm_fbdev_dma.c
@@ -123,7 +123,6 @@ static int drm_fbdev_dma_helper_fb_probe(struct 
drm_fb_helper *fb_helper,
drm_fb_helper_fill_info(info, fb_helper, sizes);
 
info->fbops = _fbdev_dma_fb_ops;
-   info->flags = FBINFO_DEFAULT;
 
/* screen */
info->flags |= FBINFO_VIRTFB; /* system memory */
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c 
b/drivers/gpu/drm/drm_fbdev_generic.c
index 98ae703848a0..8a5600b33e10 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -109,7 +109,6 @@ static int drm_fbdev_generic_helper_fb_probe(struct 
drm_fb_helper *fb_helper,
drm_fb_helper_fill_info(info, fb_helper, sizes);
 
info->fbops = _fbdev_generic_fb_ops;
-   info->flags = FBINFO_DEFAULT;
 
/* screen */
info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST;
diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c
index 955cbe9f05a7..b09a3ef770d4 100644
--- a/drivers/gpu/drm/gma500/fbdev.c
+++ b/drivers/gpu/drm/gma500/fbdev.c
@@ -215,7 +215,7 @@ static int psb_fbdev_fb_probe(struct drm_fb_helper 
*fb_helper,
}
 
info->fbops = _fbdev_fb_ops;
-   info->flags = FBINFO_DEFAULT;
+
/* Accessed stolen memory directly */
info->screen_base = dev_priv->vram_addr + backing->offset;
info->screen_size = size;
diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c 
b/drivers/gpu/drm/radeon/radeon_fbdev.c
index ab9c1abbac97..c632ca03032b 100644
--- a/drivers/gpu/drm/radeon/radeon_fbdev.c
+++ b/drivers/gpu/drm/radeon/radeon_fbdev.c
@@ -253,7 +253,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct 
drm_fb_helper *fb_helper,
}
 
info->fbops = _fbdev_fb_ops;
-   info->flags = FBINFO_DEFAULT;
+
/* radeon resume is fragile and needs a vt switch to help it along */
info->skip_vt_switch = false;
 
-- 
2.41.0



[PATCH 12/17] staging: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_FLAG_DEFAULT is 0 and has no effect, as struct
fbinfo.flags has been allocated to zero by framebuffer_alloc(). So do
not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_FLAG_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Greg Kroah-Hartman 
Cc: Sudip Mukherjee 
Cc: Teddy Wang 
---
 drivers/staging/fbtft/fbtft-core.c | 2 +-
 drivers/staging/sm750fb/sm750.c| 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/fbtft/fbtft-core.c 
b/drivers/staging/fbtft/fbtft-core.c
index 3a4abf3bae40..eac1d570f437 100644
--- a/drivers/staging/fbtft/fbtft-core.c
+++ b/drivers/staging/fbtft/fbtft-core.c
@@ -684,7 +684,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct 
fbtft_display *display,
info->var.transp.offset =  0;
info->var.transp.length =  0;
 
-   info->flags =  FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
+   info->flags =  FBINFO_VIRTFB;
 
par = info->par;
par->info = info;
diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index c260f73cf570..79bcd5bd4938 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -807,7 +807,6 @@ static int lynxfb_set_fbinfo(struct fb_info *info, int 
index)
info->screen_base = crtc->v_screen;
pr_debug("screen_base vaddr = %p\n", info->screen_base);
info->screen_size = line_length * var->yres_virtual;
-   info->flags = FBINFO_FLAG_DEFAULT | 0;
 
/* set info->fix */
fix->type = FB_TYPE_PACKED_PIXELS;
-- 
2.41.0



[PATCH 04/17] fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by devm_kzalloc(). So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Helge Deller 
---
 drivers/video/fbdev/pxafb.c| 1 -
 drivers/video/fbdev/sa1100fb.c | 1 -
 drivers/video/fbdev/wm8505fb.c | 3 +--
 drivers/video/fbdev/xilinxfb.c | 1 -
 4 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index 2a8b1dea3a67..c8c4677d06b4 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -1826,7 +1826,6 @@ static struct pxafb_info *pxafb_init_fbinfo(struct device 
*dev,
fbi->fb.var.vmode   = FB_VMODE_NONINTERLACED;
 
fbi->fb.fbops   = _ops;
-   fbi->fb.flags   = FBINFO_DEFAULT;
fbi->fb.node= -1;
 
addr = fbi;
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index a2408bf00ca0..3d76ce111488 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -1089,7 +1089,6 @@ static struct sa1100fb_info *sa1100fb_init_fbinfo(struct 
device *dev)
fbi->fb.var.vmode   = FB_VMODE_NONINTERLACED;
 
fbi->fb.fbops   = _ops;
-   fbi->fb.flags   = FBINFO_DEFAULT;
fbi->fb.monspecs= monspecs;
fbi->fb.pseudo_palette  = fbi->pseudo_palette;
 
diff --git a/drivers/video/fbdev/wm8505fb.c b/drivers/video/fbdev/wm8505fb.c
index 10a8b1250103..5833147aa43d 100644
--- a/drivers/video/fbdev/wm8505fb.c
+++ b/drivers/video/fbdev/wm8505fb.c
@@ -285,8 +285,7 @@ static int wm8505fb_probe(struct platform_device *pdev)
fbi->fb.fix.accel   = FB_ACCEL_NONE;
 
fbi->fb.fbops   = _ops;
-   fbi->fb.flags   = FBINFO_DEFAULT
-   | FBINFO_HWACCEL_COPYAREA
+   fbi->fb.flags   = FBINFO_HWACCEL_COPYAREA
| FBINFO_HWACCEL_FILLRECT
| FBINFO_HWACCEL_XPAN
| FBINFO_HWACCEL_YPAN
diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c
index 2aa3a528277f..768a281a8d2c 100644
--- a/drivers/video/fbdev/xilinxfb.c
+++ b/drivers/video/fbdev/xilinxfb.c
@@ -324,7 +324,6 @@ static int xilinxfb_assign(struct platform_device *pdev,
drvdata->info.fix.line_length = pdata->xvirt * BYTES_PER_PIXEL;
 
drvdata->info.pseudo_palette = drvdata->pseudo_palette;
-   drvdata->info.flags = FBINFO_DEFAULT;
drvdata->info.var = xilinx_fb_var;
drvdata->info.var.height = pdata->screen_height_mm;
drvdata->info.var.width = pdata->screen_width_mm;
-- 
2.41.0



[PATCH 00/17] fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT flags

2023-07-10 Thread Thomas Zimmermann
Remove the unused flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT from
fbdev and drivers, as briefly discussed at [1]. Both flags were maybe
useful when fbdev had special handling for driver modules. With
commit 376b3ff54c9a ("fbdev: Nuke FBINFO_MODULE"), they are both 0
and have no further effect.

Patches 1 to 7 remove FBINFO_DEFAULT from drivers. Patches 2 to 5
split this by the way the fb_info struct is being allocated. All flags
are cleared to zero during the allocation.

Patches 8 to 16 do the same for FBINFO_FLAG_DEFAULT. Patch 8 fixes
an actual bug in how arch/sh uses the tokne for struct fb_videomode,
which is unrelated.

Patch 17 removes both flag constants from 

[1] 
https://lore.kernel.org/dri-devel/877crer8fm@minerva.mail-host-address-is-not-set/

Thomas Zimmermann (17):
  drm: Remove flag FBINFO_DEFAULT from fbdev emulation
  fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
  fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
  fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
  fbdev: Remove flag FBINFO_DEFAULT from fbdev drivers
  fbdev/fsl-diu-fb: Remove flag FBINFO_DEFAULT
  vfio-mdev: Remove flag FBINFO_DEFAULT from fbdev sample driver
  arch/sh: Do not assign FBINFO_FLAG_DEFAULT to fb_videomode.flag
  auxdisplay: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
  hid/picolcd: Remove flag FBINFO_FLAG_DEFAULT from fbdev driver
  media: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
  staging: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
  fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
  fbdev: Remove flag FBINFO_FLAG_DEFAULT from fbdev drivers
  fbdev/atafb: Remove flag FBINFO_FLAG_DEFAULT
  fbdev/pxafb: Remove flag FBINFO_FLAG_DEFAULT
  fbdev: Remove FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT

 arch/sh/boards/mach-sh7763rdp/setup.c  | 1 -
 drivers/auxdisplay/cfag12864bfb.c  | 1 -
 drivers/auxdisplay/ht16k33.c   | 1 -
 drivers/gpu/drm/drm_fbdev_dma.c| 1 -
 drivers/gpu/drm/drm_fbdev_generic.c| 1 -
 drivers/gpu/drm/gma500/fbdev.c | 2 +-
 drivers/gpu/drm/radeon/radeon_fbdev.c  | 2 +-
 drivers/hid/hid-picolcd_fb.c   | 1 -
 drivers/media/pci/ivtv/ivtvfb.c| 1 -
 drivers/media/test-drivers/vivid/vivid-osd.c   | 1 -
 drivers/staging/fbtft/fbtft-core.c | 2 +-
 drivers/staging/sm750fb/sm750.c| 1 -
 drivers/video/fbdev/68328fb.c  | 2 +-
 drivers/video/fbdev/acornfb.c  | 2 +-
 drivers/video/fbdev/amba-clcd.c| 1 -
 drivers/video/fbdev/amifb.c| 5 ++---
 drivers/video/fbdev/arcfb.c| 1 -
 drivers/video/fbdev/asiliantfb.c   | 1 -
 drivers/video/fbdev/atafb.c| 1 -
 drivers/video/fbdev/atmel_lcdfb.c  | 2 +-
 drivers/video/fbdev/aty/aty128fb.c | 1 -
 drivers/video/fbdev/aty/atyfb_base.c   | 3 +--
 drivers/video/fbdev/aty/radeon_base.c  | 3 +--
 drivers/video/fbdev/broadsheetfb.c | 2 +-
 drivers/video/fbdev/bw2.c  | 1 -
 drivers/video/fbdev/carminefb.c| 1 -
 drivers/video/fbdev/cg14.c | 2 +-
 drivers/video/fbdev/cg3.c  | 1 -
 drivers/video/fbdev/cg6.c  | 2 +-
 drivers/video/fbdev/chipsfb.c  | 1 -
 drivers/video/fbdev/cirrusfb.c | 3 +--
 drivers/video/fbdev/clps711x-fb.c  | 1 -
 drivers/video/fbdev/cobalt_lcdfb.c | 1 -
 drivers/video/fbdev/controlfb.c| 2 +-
 drivers/video/fbdev/cyber2000fb.c  | 2 +-
 drivers/video/fbdev/da8xx-fb.c | 1 -
 drivers/video/fbdev/efifb.c| 1 -
 drivers/video/fbdev/ep93xx-fb.c| 1 -
 drivers/video/fbdev/ffb.c  | 3 +--
 drivers/video/fbdev/fm2fb.c| 1 -
 drivers/video/fbdev/fsl-diu-fb.c   | 2 +-
 drivers/video/fbdev/g364fb.c   | 2 +-
 drivers/video/fbdev/gbefb.c| 1 -
 drivers/video/fbdev/geode/gx1fb_core.c | 1 -
 drivers/video/fbdev/geode/gxfb_core.c  | 1 -
 drivers/video/fbdev/geode/lxfb_core.c  | 1 -
 drivers/video/fbdev/goldfishfb.c   | 1 -
 drivers/video/fbdev/grvga.c| 2 +-
 drivers/video/fbdev/gxt4500.c  | 3 +--
 drivers/video/fbdev/hecubafb.c | 2 +-
 drivers/video/fbdev/hgafb.c| 2 +-
 drivers/video/fbdev/hitfb.c| 2 +-
 drivers/video/fbdev/hpfb.c | 1 -
 drivers/video/fbdev/hyperv_fb.c| 2 --
 drivers/video/fbdev/i740fb.c   | 2 +-
 drivers/video/fbdev/i810/i810_main.c   | 4 ++--
 drivers/video/fbdev/imsttfb.c  | 3 +--
 drivers/video/fbdev/imxfb.c| 3 +--
 drivers/video/fbdev/intelfb/intelfbdrv.c   | 5 

[PATCH 06/17] fbdev/fsl-diu-fb: Remove flag FBINFO_DEFAULT

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by dmam_alloc_coherent(__GFP_ZERO). So do not
set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Timur Tabi 
Cc: Helge Deller 
---
 drivers/video/fbdev/fsl-diu-fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index 785eb8a06943..c62b48f27ba9 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -1476,7 +1476,7 @@ static int install_fb(struct fb_info *info)
 
info->var.activate = FB_ACTIVATE_NOW;
info->fbops = _diu_ops;
-   info->flags = FBINFO_DEFAULT | FBINFO_VIRTFB | FBINFO_PARTIAL_PAN_OK |
+   info->flags = FBINFO_VIRTFB | FBINFO_PARTIAL_PAN_OK |
FBINFO_READS_FAST;
info->pseudo_palette = mfbi->pseudo_palette;
 
-- 
2.41.0



[PATCH 07/17] vfio-mdev: Remove flag FBINFO_DEFAULT from fbdev sample driver

2023-07-10 Thread Thomas Zimmermann
The flag FBINFO_DEFAULT is 0 and has no effect, as struct fbinfo.flags
has been allocated to zero by framebuffer_alloc(). So do not set it.

Flags should signal differences from the default values. After cleaning
up all occurences of FBINFO_DEFAULT, the token can be removed.

Signed-off-by: Thomas Zimmermann 
Cc: Kirti Wankhede 
---
 samples/vfio-mdev/mdpy-fb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 3c8001b9e407..cda477b28685 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -162,7 +162,6 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
}
 
info->fbops = _fb_ops;
-   info->flags = FBINFO_DEFAULT;
info->pseudo_palette = par->palette;
 
ret = register_framebuffer(info);
-- 
2.41.0



[PATCH v2 10/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_partition sysfs interface file

2023-07-10 Thread Kajol Jain
Add details of the new hv-gpci interface file called
"affinity_domain_via_partition" in the ABI documentation.

Signed-off-by: Kajol Jain 
---
 .../sysfs-bus-event_source-devices-hv_gpci| 32 +++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index d8e65b93d1f7..b03b2bd4b081 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -208,3 +208,35 @@ Description:   admin read only
   more information.
 
* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:  /sys/devices/hv_gpci/interface/affinity_domain_via_partition
+Date:  July 2023
+Contact:   Linux on PowerPC Developer List 
+Description:   admin read only
+   This sysfs file exposes the system topology information by 
making HCALL
+   H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request 
value
+   AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1).
+
+   * This sysfs file will be created only for power10 and above 
platforms.
+
+   * User needs root privileges to read data from this sysfs file.
+
+   * This sysfs file will be created, only when the HCALL returns 
"H_SUCESS",
+ "H_AUTHORITY" and "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY", can be resolved 
during
+ runtime by setting "Enable Performance Information 
Collection" option.
+
+   * The end user reading this sysfs file must decode the content 
as per
+ underlying platform/firmware.
+
+   Possible error codes while reading this sysfs file:
+
+   * "-EPERM" : Partition is not permitted to retrieve performance 
information,
+   required to set "Enable Performance Information 
Collection" option.
+
+   * "-EIO" : Can't retrieve system information because of invalid 
buffer length/invalid address
+  or because of some hardware error. Refer 
getPerfCountInfo documentation for
+  more information.
+
+   * "-EFBIG" : System information exceeds PAGE_SIZE.
-- 
2.31.1



[PATCH v2 09/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via partition information

2023-07-10 Thread Kajol Jain
The hcall H_GET_PERF_COUNTER_INFO with counter request value as
AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0XB1), can be used to get
the system affinity domain via partition information. To expose the system
affinity domain via partition information, patch adds sysfs file called
"affinity_domain_via_partition" to the "/sys/devices/hv_gpci/interface/"
of hv_gpci pmu driver.

Add new entry for AFFINITY_DOMAIN_VIA_PAR in sysinfo_counter_request
array, which points to the counter request value
"affinity_domain_via_partition" in hv-gpci.c file. Also add a
new function called "affinity_domain_via_partition_result_parse" to parse
the hcall result and store it in output buffer.

The affinity_domain_via_partition sysfs file is only available for power10
and above platforms. Add a macro called
INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR, which points to the index of NULL
placeholder, for affinity_domain_via_partition attribute in
interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR
macro in hv-gpci.c file.

Signed-off-by: Kajol Jain 
---
 arch/powerpc/perf/hv-gpci.c | 160 +++-
 1 file changed, 159 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 326b758df7c8..f2fff166290b 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -107,7 +107,8 @@ static ssize_t cpumask_show(struct device *dev,
 #define INTERFACE_PROCESSOR_CONFIG_ATTR7
 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR  8
 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
-#define INTERFACE_NULL_ATTR10
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
+#define INTERFACE_NULL_ATTR11
 
 /* Counter request value to retrieve system information */
 enum {
@@ -115,6 +116,7 @@ enum {
PROCESSOR_CONFIG,
AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+   AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
 };
 
 static int sysinfo_counter_request[] = {
@@ -122,6 +124,7 @@ static int sysinfo_counter_request[] = {
[PROCESSOR_CONFIG] = 0x90,
[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+   [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
 };
 
 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) 
__aligned(sizeof(uint64_t));
@@ -458,6 +461,152 @@ static ssize_t affinity_domain_via_domain_show(struct 
device *dev, struct device
return ret;
 }
 
+static void affinity_domain_via_partition_result_parse(int returned_values,
+   int element_size, char *buf, size_t *last_element,
+   size_t *n, struct hv_gpci_request_buffer *arg)
+{
+   size_t i = 0, j = 0;
+   size_t k, l, m;
+   uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+   /*
+* hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+* to show the total number of counter_value array elements
+* returned via hcall.
+* Unlike other request types, the data structure returned by this
+* request is variable-size. For this counter request type,
+* hcall populates 'cv_element_size' corresponds to minimum size of
+* the structure returned i.e; the size of the structure with no domain
+* information. Below loop go through all counter_value array
+* to determine the number and size of each domain array element and
+* add it to the output buffer.
+*/
+   while (i < returned_values) {
+   k = j;
+   for (; k < j + element_size; k++)
+   *n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
+   *n += sprintf(buf + *n,  "\n");
+
+   total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | 
(u8)arg->bytes[k - 3];
+   size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | 
(u8)arg->bytes[k - 1];
+
+   for (l = 0; l < total_affinity_domain_ele; l++) {
+   for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+   *n += sprintf(buf + *n,  "%02x", 
(u8)arg->bytes[k]);
+   k++;
+   }
+   *n += sprintf(buf + *n,  "\n");
+   }
+
+   *n += sprintf(buf + *n,  "\n");
+   i++;
+   j = k;
+   }
+
+   *last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct 
device_attribute *attr,
+   char *buf)
+{
+   struct hv_gpci_request_buffer *arg;
+   unsigned long ret;
+   size_t n = 0;
+   size_t last_element = 0;
+   u32 starting_index;
+
+   arg = (void *)get_cpu_var(hv_gpci_reqb);
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   /*
+   

[PATCH v2 08/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_domain sysfs interface file

2023-07-10 Thread Kajol Jain
Add details of the new hv-gpci interface file called
"affinity_domain_via_domain" in the ABI documentation.

Signed-off-by: Kajol Jain 
---
 .../sysfs-bus-event_source-devices-hv_gpci| 32 +++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 3b63d66658fe..d8e65b93d1f7 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -176,3 +176,35 @@ Description:   admin read only
   more information.
 
* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:  /sys/devices/hv_gpci/interface/affinity_domain_via_domain
+Date:  July 2023
+Contact:   Linux on PowerPC Developer List 
+Description:   admin read only
+   This sysfs file exposes the system topology information by 
making HCALL
+   H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request 
value
+   AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0).
+
+   * This sysfs file will be created only for power10 and above 
platforms.
+
+   * User needs root privileges to read data from this sysfs file.
+
+   * This sysfs file will be created, only when the HCALL returns 
"H_SUCESS",
+ "H_AUTHORITY" and "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY", can be resolved 
during
+ runtime by setting "Enable Performance Information 
Collection" option.
+
+   * The end user reading this sysfs file must decode the content 
as per
+ underlying platform/firmware.
+
+   Possible error codes while reading this sysfs file:
+
+   * "-EPERM" : Partition is not permitted to retrieve performance 
information,
+   required to set "Enable Performance Information 
Collection" option.
+
+   * "-EIO" : Can't retrieve system information because of invalid 
buffer length/invalid address
+  or because of some hardware error. Refer 
getPerfCountInfo documentation for
+  more information.
+
+   * "-EFBIG" : System information exceeds PAGE_SIZE.
-- 
2.31.1



[PATCH v2 07/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via domain information

2023-07-10 Thread Kajol Jain
The hcall H_GET_PERF_COUNTER_INFO with counter request value as
AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0XB0), can be used to get
the system affinity domain via domain information. To expose the system
affinity domain via domain information, patch adds sysfs file called
"affinity_domain_via_domain" to the "/sys/devices/hv_gpci/interface/"
of hv_gpci pmu driver.

Add new entry for AFFINITY_DOMAIN_VIA_DOM in sysinfo_counter_request
array, which points to the counter request value
"affinity_domain_via_domain" in hv-gpci.c file.

The affinity_domain_via_domain sysfs file is only available for power10
and above platforms. Add a macro called
INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR, which points to the index of NULL
placeholder, for affinity_domain_via_domain attribute in interface_attrs
array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c
file.

Signed-off-by: Kajol Jain 
---
 arch/powerpc/perf/hv-gpci.c | 80 -
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 68502cb18262..326b758df7c8 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -106,19 +106,22 @@ static ssize_t cpumask_show(struct device *dev,
 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR  6
 #define INTERFACE_PROCESSOR_CONFIG_ATTR7
 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR  8
-#define INTERFACE_NULL_ATTR9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
+#define INTERFACE_NULL_ATTR10
 
 /* Counter request value to retrieve system information */
 enum {
PROCESSOR_BUS_TOPOLOGY,
PROCESSOR_CONFIG,
AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+   AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
 };
 
 static int sysinfo_counter_request[] = {
[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
[PROCESSOR_CONFIG] = 0x90,
[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+   [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
 };
 
 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) 
__aligned(sizeof(uint64_t));
@@ -389,6 +392,72 @@ static ssize_t 
affinity_domain_via_virtual_processor_show(struct device *dev,
return ret;
 }
 
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct 
device_attribute *attr,
+   char *buf)
+{
+   struct hv_gpci_request_buffer *arg;
+   unsigned long ret;
+   size_t n = 0;
+
+   arg = (void *)get_cpu_var(hv_gpci_reqb);
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   /*
+* Pass the counter request 0xB0 corresponds to request
+* type 'Affinity_domain_information_by_domain',
+* to retrieve the system affinity domain information.
+* starting_index value refers to the starting hardware
+* processor index.
+*/
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+   0, 0, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+
+   /*
+* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+* implies that buffer can't accommodate all information, and a partial 
buffer
+* returned. To handle that, we need to take subsequent requests
+* with next starting index to retrieve additional (missing) data.
+* Below loop do subsequent hcalls with next starting index and add it
+* to buffer util we get all the information.
+*/
+   while (ret == H_PARAMETER) {
+   int returned_values = be16_to_cpu(arg->params.returned_values);
+   int elementsize = be16_to_cpu(arg->params.cv_element_size);
+   int last_element = (returned_values - 1) * elementsize;
+
+   /*
+* Since the starting index value is part of counter_value
+* buffer elements, use the starting index value in the last
+* element and add 1 to make subsequent hcalls.
+*/
+   u32 starting_index = arg->bytes[last_element + 1] +
+   (arg->bytes[last_element] << 8) + 1;
+
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+   starting_index, 0, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+   }
+
+   return n;
+
+out:
+   put_cpu_var(hv_gpci_reqb);
+   return ret;
+}
+
 static DEVICE_ATTR_RO(kernel_version);
 static DEVICE_ATTR_RO(cpumask);
 
@@ -420,6 +489,11 @@ static struct attribute *interface_attrs[] = {
 * attribute, set in init function if applicable.
 */
NULL,
+   

[PATCH v2 06/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_virtual_processor sysfs interface file

2023-07-10 Thread Kajol Jain
Add details of the new hv-gpci interface file called
"affinity_domain_via_virtual_processor" in the ABI documentation.

Signed-off-by: Kajol Jain 
---
 .../sysfs-bus-event_source-devices-hv_gpci| 32 +++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index aff52dc3b05c..3b63d66658fe 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -144,3 +144,35 @@ Description:   admin read only
   more information.
 
* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:  
/sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor
+Date:  July 2023
+Contact:   Linux on PowerPC Developer List 
+Description:   admin read only
+   This sysfs file exposes the system topology information by 
making HCALL
+   H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request 
value
+   AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0).
+
+   * This sysfs file will be created only for power10 and above 
platforms.
+
+   * User needs root privileges to read data from this sysfs file.
+
+   * This sysfs file will be created, only when the HCALL returns 
"H_SUCESS",
+ "H_AUTHORITY" and "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY", can be resolved 
during
+ runtime by setting "Enable Performance Information 
Collection" option.
+
+   * The end user reading this sysfs file must decode the content 
as per
+ underlying platform/firmware.
+
+   Possible error codes while reading this sysfs file:
+
+   * "-EPERM" : Partition is not permitted to retrieve performance 
information,
+   required to set "Enable Performance Information 
Collection" option.
+
+   * "-EIO" : Can't retrieve system information because of invalid 
buffer length/invalid address
+  or because of some hardware error. Refer 
getPerfCountInfo documentation for
+  more information.
+
+   * "-EFBIG" : System information exceeds PAGE_SIZE.
-- 
2.31.1



[PATCH v2 05/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via virtual processor information

2023-07-10 Thread Kajol Jain
The hcall H_GET_PERF_COUNTER_INFO with counter request value as
AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0XA0), can be used to get
the system affinity domain via virtual processor information. To expose
the system affinity domain via virtual processor information, patch adds
sysfs file called "affinity_domain_via_virtual_processor" to the
"/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver.

The affinity_domain_via_virtual_processor sysfs file is only available for
power10 and above platforms. Add a macro called
INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR, which points to the index of NULL
placeholder, for affinity_domain_via_virtual_processor attribute in
interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro
in hv-gpci.c file.

Signed-off-by: Kajol Jain 
---
 arch/powerpc/perf/hv-gpci.c | 86 -
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index c74076d3c7a7..68502cb18262 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -105,17 +105,20 @@ static ssize_t cpumask_show(struct device *dev,
 /* Interface attribute array index to store system information */
 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR  6
 #define INTERFACE_PROCESSOR_CONFIG_ATTR7
-#define INTERFACE_NULL_ATTR8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR  8
+#define INTERFACE_NULL_ATTR9
 
 /* Counter request value to retrieve system information */
 enum {
PROCESSOR_BUS_TOPOLOGY,
-   PROCESSOR_CONFIG
+   PROCESSOR_CONFIG,
+   AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
 };
 
 static int sysinfo_counter_request[] = {
[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
[PROCESSOR_CONFIG] = 0x90,
+   [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
 };
 
 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) 
__aligned(sizeof(uint64_t));
@@ -316,6 +319,76 @@ static ssize_t processor_config_show(struct device *dev, 
struct device_attribute
return ret;
 }
 
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct hv_gpci_request_buffer *arg;
+   unsigned long ret;
+   size_t n = 0;
+
+   arg = (void *)get_cpu_var(hv_gpci_reqb);
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   /*
+* Pass the counter request 0xA0 corresponds to request
+* type 'Affinity_domain_information_by_virutal_processor',
+* to retrieve the system affinity domain information.
+* starting_index value refers to the starting hardware
+* processor index.
+*/
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+   0, 0, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+
+   /*
+* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+* implies that buffer can't accommodate all information, and a partial 
buffer
+* returned. To handle that, we need to take subsequent requests
+* with next secondary index to retrieve additional (missing) data.
+* Below loop do subsequent hcalls with next secondary index and add it
+* to buffer util we get all the information.
+*/
+   while (ret == H_PARAMETER) {
+   int returned_values = be16_to_cpu(arg->params.returned_values);
+   int elementsize = be16_to_cpu(arg->params.cv_element_size);
+   int last_element = (returned_values - 1) * elementsize;
+
+   /*
+* Since the starting index and secondary index type is part of 
the
+* counter_value buffer elements, use the starting index value 
in the
+* last array element as subsequent starting index, and use 
secondary index
+* value in the last array element plus 1 as subsequent 
secondary index.
+* For counter request '0xA0', starting index points to 
partition id
+* and secondary index points to corresponding virtual 
processor index.
+*/
+   u32 starting_index = arg->bytes[last_element + 1] + 
(arg->bytes[last_element] << 8);
+   u16 secondary_index = arg->bytes[last_element + 3] +
+   (arg->bytes[last_element + 2] << 8) + 1;
+
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+   starting_index, secondary_index, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+   }
+
+   return n;
+
+out:
+   

[PATCH v2 04/10] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document processor_config sysfs interface file

2023-07-10 Thread Kajol Jain
Add details of the new hv-gpci interface file called
"processor_config" in the ABI documentation.

Signed-off-by: Kajol Jain 
---
 .../sysfs-bus-event_source-devices-hv_gpci| 32 +++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci 
b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
index 2eeeab9a20fa..aff52dc3b05c 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -112,3 +112,35 @@ Description:   admin read only
   more information.
 
* "-EFBIG" : System information exceeds PAGE_SIZE.
+
+What:  /sys/devices/hv_gpci/interface/processor_config
+Date:  July 2023
+Contact:   Linux on PowerPC Developer List 
+Description:   admin read only
+   This sysfs file exposes the system topology information by 
making HCALL
+   H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request 
value
+   PROCESSOR_CONFIG(0x90).
+
+   * This sysfs file will be created only for power10 and above 
platforms.
+
+   * User needs root privileges to read data from this sysfs file.
+
+   * This sysfs file will be created, only when the HCALL returns 
"H_SUCESS",
+ "H_AUTHORITY" and "H_PARAMETER" as the return type.
+
+ HCALL with return error type "H_AUTHORITY", can be resolved 
during
+ runtime by setting "Enable Performance Information 
Collection" option.
+
+   * The end user reading this sysfs file must decode the content 
as per
+ underlying platform/firmware.
+
+   Possible error codes while reading this sysfs file:
+
+   * "-EPERM" : Partition is not permitted to retrieve performance 
information,
+   required to set "Enable Performance Information 
Collection" option.
+
+   * "-EIO" : Can't retrieve system information because of invalid 
buffer length/invalid address
+  or because of some hardware error. Refer 
getPerfCountInfo documentation for
+  more information.
+
+   * "-EFBIG" : System information exceeds PAGE_SIZE.
-- 
2.31.1



[PATCH v2 03/10] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor config information

2023-07-10 Thread Kajol Jain
The hcall H_GET_PERF_COUNTER_INFO with counter request value as
PROCESSOR_CONFIG(0X90), can be used to get the system
processor configuration information. To expose the system
processor config information, patch adds sysfs file called
"processor_config" to the "/sys/devices/hv_gpci/interface/"
of hv_gpci pmu driver.

Add enum and sysinfo_counter_request array to get required
counter request value in hv-gpci.c file.
Also add a new function called "sysinfo_device_attr_create",
which will create and return required device attribute to the
add_sysinfo_interface_files function.

The processor_config sysfs file is only available for power10
and above platforms. Add a new macro called
INTERFACE_PROCESSOR_CONFIG_ATTR, which points to the index of
NULL placefolder, for processor_config attribute in the interface_attrs
array. Also add macro INTERFACE_NULL_ATTR which points to index of NULL
attribute in interface_attrs array.

Signed-off-by: Kajol Jain 
---
 arch/powerpc/perf/hv-gpci.c | 168 
 1 file changed, 153 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 225f148f75fd..c74076d3c7a7 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -102,11 +102,21 @@ static ssize_t cpumask_show(struct device *dev,
return cpumap_print_to_pagebuf(true, buf, _gpci_cpumask);
 }
 
-/* Counter request value to retrieve system information */
-#define PROCESSOR_BUS_TOPOLOGY 0XD0
-
 /* Interface attribute array index to store system information */
 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR  6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR7
+#define INTERFACE_NULL_ATTR8
+
+/* Counter request value to retrieve system information */
+enum {
+   PROCESSOR_BUS_TOPOLOGY,
+   PROCESSOR_CONFIG
+};
+
+static int sysinfo_counter_request[] = {
+   [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+   [PROCESSOR_CONFIG] = 0x90,
+};
 
 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) 
__aligned(sizeof(uint64_t));
 
@@ -187,7 +197,8 @@ static ssize_t processor_bus_topology_show(struct device 
*dev, struct device_att
 * starting_index value implies the starting hardware
 * chip id.
 */
-   ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, 0, 0, buf, , 
arg);
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+   0, 0, buf, , arg);
 
if (!ret)
return n;
@@ -220,8 +231,76 @@ static ssize_t processor_bus_topology_show(struct device 
*dev, struct device_att
 
memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
 
-   ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, 
starting_index,
-   0, buf, , arg);
+   ret = 
systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+   starting_index, 0, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+   }
+
+   return n;
+
+out:
+   put_cpu_var(hv_gpci_reqb);
+   return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct 
device_attribute *attr,
+   char *buf)
+{
+   struct hv_gpci_request_buffer *arg;
+   unsigned long ret;
+   size_t n = 0;
+
+   arg = (void *)get_cpu_var(hv_gpci_reqb);
+   memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+   /*
+* Pass the counter request value 0x90 corresponds to request
+* type 'Processor_config', to retrieve
+* the system processor information.
+* starting_index value implies the starting hardware
+* processor index.
+*/
+   ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+   0, 0, buf, , arg);
+
+   if (!ret)
+   return n;
+
+   if (ret != H_PARAMETER)
+   goto out;
+
+   /*
+* ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+* implies that buffer can't accommodate all information, and a partial 
buffer
+* returned. To handle that, we need to take subsequent requests
+* with next starting index to retrieve additional (missing) data.
+* Below loop do subsequent hcalls with next starting index and add it
+* to buffer util we get all the information.
+*/
+   while (ret == H_PARAMETER) {
+   int returned_values = be16_to_cpu(arg->params.returned_values);
+   int elementsize = be16_to_cpu(arg->params.cv_element_size);
+   int last_element = (returned_values - 1) * elementsize;
+
+   /*
+* Since the starting index is part of counter_value
+* buffer elements, use the starting index value in the last
+

  1   2   >