Re: [PATCH 05/11] swiotlb: pass a gfp_mask argument to swiotlb_init_late

2022-02-24 Thread Anshuman Khandual



On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> Let the caller chose a zone to allocate from.

This is being used later via xen_swiotlb_gfp() on arm platform.

> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/x86/pci/sta2x11-fixup.c | 2 +-
>  include/linux/swiotlb.h  | 2 +-
>  kernel/dma/swiotlb.c | 4 ++--
>  3 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index e0c039a75b2db..c7e6faf59a861 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
>   int size = STA2X11_SWIOTLB_SIZE;
>   /* First instance: register your own swiotlb area */
>   dev_info(>dev, "Using SWIOTLB (size %i)\n", size);
> - if (swiotlb_init_late(size))
> + if (swiotlb_init_late(size, GFP_DMA))
>   dev_emerg(>dev, "init swiotlb failed\n");
>   }
>   list_add(>list, _instance_list);
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index b48b26bfa0edb..1befd6b2ccf5e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -40,7 +40,7 @@ extern void swiotlb_init(int verbose);
>  int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
>  unsigned long swiotlb_size_or_default(void);
>  extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
> -int swiotlb_init_late(size_t size);
> +int swiotlb_init_late(size_t size, gfp_t gfp_mask);
>  extern void __init swiotlb_update_mem_attributes(void);
>  
>  phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 5f64b02fbb732..a653fcf1fe6c2 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -290,7 +290,7 @@ swiotlb_init(int verbose)
>   * initialize the swiotlb later using the slab allocator if needed.
>   * This should be just like above, but with some error catching.
>   */
> -int swiotlb_init_late(size_t size)
> +int swiotlb_init_late(size_t size, gfp_t gfp_mask)
>  {
>   unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
>   unsigned long bytes;
> @@ -309,7 +309,7 @@ int swiotlb_init_late(size_t size)
>   bytes = nslabs << IO_TLB_SHIFT;
>  
>   while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
> - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
> +     vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
> order);
>   if (vstart)
>   break;
> 

Reviewed-by: Anshuman Khandual 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 04/11] swiotlb: rename swiotlb_late_init_with_default_size

2022-02-24 Thread Anshuman Khandual



On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> swiotlb_late_init_with_default_size is an overly verbose name that
> doesn't even catch what the function is doing, given that the size is
> not just a default but the actual requested size.
> 
> Rename it to swiotlb_init_late.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/x86/pci/sta2x11-fixup.c | 2 +-
>  include/linux/swiotlb.h  | 2 +-
>  kernel/dma/swiotlb.c | 6 ++
>  3 files changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index 101081ad64b6d..e0c039a75b2db 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
>   int size = STA2X11_SWIOTLB_SIZE;
>   /* First instance: register your own swiotlb area */
>   dev_info(>dev, "Using SWIOTLB (size %i)\n", size);
> - if (swiotlb_late_init_with_default_size(size))
> + if (swiotlb_init_late(size))
>   dev_emerg(>dev, "init swiotlb failed\n");
>   }
>   list_add(>list, _instance_list);
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 9fb3a568f0c51..b48b26bfa0edb 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -40,7 +40,7 @@ extern void swiotlb_init(int verbose);
>  int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
>  unsigned long swiotlb_size_or_default(void);
>  extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
> -extern int swiotlb_late_init_with_default_size(size_t default_size);
> +int swiotlb_init_late(size_t size);
>  extern void __init swiotlb_update_mem_attributes(void);
>  
>  phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 519e363097190..5f64b02fbb732 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -290,11 +290,9 @@ swiotlb_init(int verbose)
>   * initialize the swiotlb later using the slab allocator if needed.
>   * This should be just like above, but with some error catching.
>   */
> -int
> -swiotlb_late_init_with_default_size(size_t default_size)
> +int swiotlb_init_late(size_t size)
>  {
> - unsigned long nslabs =
> - ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
> + unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
>   unsigned long bytes;
>   unsigned char *vstart = NULL;
>   unsigned int order;
> 

Reviewed-by: Anshuman Khandual 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 03/11] swiotlb: simplify swiotlb_max_segment

2022-02-24 Thread Anshuman Khandual



On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> Remove the bogus Xen override that was usually larger than the actual
> size and just calculate the value on demand.  Note that
> swiotlb_max_segment still doesn't make sense as an interface and should
> eventually be removed.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/xen/swiotlb-xen.c |  2 --
>  include/linux/swiotlb.h   |  1 -
>  kernel/dma/swiotlb.c  | 20 +++-
>  3 files changed, 3 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 47aebd98f52f5..485cd06ed39e7 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -202,7 +202,6 @@ int xen_swiotlb_init(void)
>   rc = swiotlb_late_init_with_tbl(start, nslabs);
>   if (rc)
>   return rc;
> - swiotlb_set_max_segment(PAGE_SIZE);
>   return 0;
>  error:
>   if (nslabs > 1024 && repeat--) {
> @@ -254,7 +253,6 @@ void __init xen_swiotlb_init_early(void)
>  
>   if (swiotlb_init_with_tbl(start, nslabs, true))
>   panic("Cannot allocate SWIOTLB buffer");
> - swiotlb_set_max_segment(PAGE_SIZE);
>  }
>  #endif /* CONFIG_X86 */
>  
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index f6c3638255d54..9fb3a568f0c51 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -164,7 +164,6 @@ static inline void swiotlb_adjust_size(unsigned long size)
>  #endif /* CONFIG_SWIOTLB */
>  
>  extern void swiotlb_print_info(void);
> -extern void swiotlb_set_max_segment(unsigned int);
>  
>  #ifdef CONFIG_DMA_RESTRICTED_POOL
>  struct page *swiotlb_alloc(struct device *dev, size_t size);
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 36fbf1181d285..519e363097190 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -75,12 +75,6 @@ struct io_tlb_mem io_tlb_default_mem;
>  
>  phys_addr_t swiotlb_unencrypted_base;
>  
> -/*
> - * Max segment that we can provide which (if pages are contingous) will
> - * not be bounced (unless SWIOTLB_FORCE is set).
> - */
> -static unsigned int max_segment;
> -
>  static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
>  
>  static int __init
> @@ -104,18 +98,12 @@ early_param("swiotlb", setup_io_tlb_npages);
>  
>  unsigned int swiotlb_max_segment(void)
>  {
> - return io_tlb_default_mem.nslabs ? max_segment : 0;
> + if (!io_tlb_default_mem.nslabs)
> + return 0;
> + return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE);
>  }
>  EXPORT_SYMBOL_GPL(swiotlb_max_segment);
>  
> -void swiotlb_set_max_segment(unsigned int val)
> -{
> - if (swiotlb_force == SWIOTLB_FORCE)
> - max_segment = 1;
> - else
> - max_segment = rounddown(val, PAGE_SIZE);
> -}
> -
>  unsigned long swiotlb_size_or_default(void)
>  {
>   return default_nslabs << IO_TLB_SHIFT;
> @@ -267,7 +255,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
> nslabs, int verbose)
>  
>   if (verbose)
>   swiotlb_print_info();
> - swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
>   return 0;
>  }
>  
> @@ -368,7 +355,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long 
> nslabs)
>   swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
>  
>   swiotlb_print_info();
> - swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
>   return 0;
>  }
>  

Reviewed-by: Anshuman Khandual 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 02/11] swiotlb: make swiotlb_exit a no-op if SWIOTLB_FORCE is set

2022-02-24 Thread Anshuman Khandual



On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> If force bouncing is enabled we can't release the bufffers.

typo

> 
> Signed-off-by: Christoph Hellwig 
> ---
>  kernel/dma/swiotlb.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index f1e7ea160b433..36fbf1181d285 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -378,6 +378,9 @@ void __init swiotlb_exit(void)
>   unsigned long tbl_vaddr;
>   size_t tbl_size, slots_size;
>  
> + if (swiotlb_force == SWIOTLB_FORCE)
> + return;
> +
>   if (!mem->nslabs)
>   return;
>  
> 

Reviewed-by: Anshuman Khandual 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/11] dma-direct: use is_swiotlb_active in dma_direct_map_page

2022-02-24 Thread Anshuman Khandual



On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> Use the more specific is_swiotlb_active check instead of checking the
> global swiotlb_force variable.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  kernel/dma/direct.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
> index 4632b0f4f72eb..4dc16e08c7e1a 100644
> --- a/kernel/dma/direct.h
> +++ b/kernel/dma/direct.h
> @@ -91,7 +91,7 @@ static inline dma_addr_t dma_direct_map_page(struct device 
> *dev,
>   return swiotlb_map(dev, phys, size, dir, attrs);
>  
>   if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
> - if (swiotlb_force != SWIOTLB_NO_FORCE)
> + if (is_swiotlb_active(dev))
>   return swiotlb_map(dev, phys, size, dir, attrs);
>  
>   dev_WARN_ONCE(dev, 1,
> 

Reviewed-by: Anshuman Khandual 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 07/11] x86: remove the IOMMU table infrastructure

2022-02-24 Thread Anshuman Khandual


On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> The IOMMU table tries to separate the different IOMMUs into different
> backends, but actually requires various cross calls.
> 
> Rewrite the code to do the generic swiotlb/swiotlb-xen setup directly
> in pci-dma.c and then just call into the IOMMU drivers.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/ia64/include/asm/iommu_table.h|   7 --
>  arch/x86/include/asm/dma-mapping.h |   1 -
>  arch/x86/include/asm/gart.h|   5 +-
>  arch/x86/include/asm/iommu.h   |   6 ++
>  arch/x86/include/asm/iommu_table.h | 102 --
>  arch/x86/include/asm/swiotlb.h |  30 ---
>  arch/x86/include/asm/xen/swiotlb-xen.h |   2 -
>  arch/x86/kernel/Makefile   |   2 -
>  arch/x86/kernel/amd_gart_64.c  |   5 +-
>  arch/x86/kernel/aperture_64.c  |  14 ++--
>  arch/x86/kernel/pci-dma.c  | 112 -
>  arch/x86/kernel/pci-iommu_table.c  |  77 -
>  arch/x86/kernel/pci-swiotlb.c  |  77 -
>  arch/x86/kernel/tboot.c|   1 -
>  arch/x86/kernel/vmlinux.lds.S  |  12 ---
>  arch/x86/xen/Makefile  |   2 -
>  arch/x86/xen/pci-swiotlb-xen.c |  96 -
>  drivers/iommu/amd/init.c   |   6 --
>  drivers/iommu/amd/iommu.c  |   5 +-
>  drivers/iommu/intel/dmar.c |   6 +-
>  include/linux/dmar.h   |   6 +-
>  21 files changed, 115 insertions(+), 459 deletions(-)
>  delete mode 100644 arch/ia64/include/asm/iommu_table.h
>  delete mode 100644 arch/x86/include/asm/iommu_table.h
>  delete mode 100644 arch/x86/include/asm/swiotlb.h
>  delete mode 100644 arch/x86/kernel/pci-iommu_table.c
>  delete mode 100644 arch/x86/kernel/pci-swiotlb.c
>  delete mode 100644 arch/x86/xen/pci-swiotlb-xen.c

checkpatch.pl has some warnings here.

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#44: 
deleted file mode 100644

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then 
dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
#496: FILE: arch/x86/kernel/pci-dma.c:171:
+   printk(KERN_INFO "PCI-DMA: "

WARNING: quoted string split across lines
#497: FILE: arch/x86/kernel/pci-dma.c:172:
+   printk(KERN_INFO "PCI-DMA: "
+  "Using software bounce buffering for IO (SWIOTLB)\n");

ERROR: trailing whitespace
#881: FILE: drivers/iommu/amd/iommu.c:1837:
+^Iif (iommu_default_passthrough() || sme_me_mask) $

total: 1 errors, 3 warnings, 389 lines checked
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 10/11] swiotlb: merge swiotlb-xen initialization into swiotlb

2022-02-24 Thread Anshuman Khandual
On 2/22/22 9:05 PM, Christoph Hellwig wrote:
> Allow to pass a remap argument to the swiotlb initialization functions
> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
> from xen_swiotlb_fixup, so we don't even need that quirk.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm/xen/mm.c   |  23 +++---
>  arch/x86/include/asm/xen/page.h |   5 --
>  arch/x86/kernel/pci-dma.c   |  27 ---
>  arch/x86/pci/sta2x11-fixup.c|   2 +-
>  drivers/xen/swiotlb-xen.c   | 128 +---
>  include/linux/swiotlb.h |   7 +-
>  include/xen/arm/page.h  |   1 -
>  include/xen/swiotlb-xen.h   |   8 +-
>  kernel/dma/swiotlb.c| 120 +++---
>  9 files changed, 102 insertions(+), 219 deletions(-)

checkpatch.pl has some warnings here.

ERROR: trailing whitespace
#151: FILE: arch/x86/kernel/pci-dma.c:217:
+ $

WARNING: please, no spaces at the start of a line
#151: FILE: arch/x86/kernel/pci-dma.c:217:
+ $

total: 1 errors, 1 warnings, 470 lines checked
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] mm/page_alloc: Fix pageblock_order with HUGETLB_PAGE_SIZE_VARIABLE

2021-02-16 Thread Anshuman Khandual

On 2/12/21 3:09 PM, David Hildenbrand wrote:
> On 12.02.21 08:02, Anshuman Khandual wrote:
>>
>> On 2/11/21 2:07 PM, David Hildenbrand wrote:
>>> On 11.02.21 07:22, Anshuman Khandual wrote:
>>>> The following warning gets triggered while trying to boot a 64K page size
>>>> without THP config kernel on arm64 platform.
>>>>
>>>> WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 
>>>> __fragmentation_index+0xa4/0xc0
>>>> Modules linked in:
>>>> CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 
>>>> #159
>>>> Hardware name: linux,dummy-virt (DT)
>>>> [    8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
>>>> [    8.811732] pc : __fragmentation_index+0xa4/0xc0
>>>> [    8.812555] lr : fragmentation_index+0xf8/0x138
>>>> [    8.813360] sp : 864079b0
>>>> [    8.813958] x29: 864079b0 x28: 0372
>>>> [    8.814901] x27: 7682 x26: 8000135b3948
>>>> [    8.815847] x25: 1fffe00010c80f48 x24: 
>>>> [    8.816805] x23:  x22: 000d
>>>> [    8.817764] x21: 0030 x20: 0005ffcb4d58
>>>> [    8.818712] x19: 000b x18: 
>>>> [    8.819656] x17:  x16: 
>>>> [    8.820613] x15:  x14: 8000114c6258
>>>> [    8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
>>>> [    8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
>>>> [    8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
>>>> [    8.824415] x7 : 0001 x6 : 41b58ab3
>>>> [    8.825359] x5 : 600010c80f48 x4 : dfff8000
>>>> [    8.826313] x3 : 8000102be670 x2 : 0007
>>>> [    8.827259] x1 : 86407a60 x0 : 000d
>>>> [    8.828218] Call trace:
>>>> [    8.828667]  __fragmentation_index+0xa4/0xc0
>>>> [    8.829436]  fragmentation_index+0xf8/0x138
>>>> [    8.830194]  compaction_suitable+0x98/0xb8
>>>> [    8.830934]  wakeup_kcompactd+0xdc/0x128
>>>> [    8.831640]  balance_pgdat+0x71c/0x7a0
>>>> [    8.832327]  kswapd+0x31c/0x520
>>>> [    8.832902]  kthread+0x224/0x230
>>>> [    8.833491]  ret_from_fork+0x10/0x30
>>>> [    8.834150] ---[ end trace 472836f79c15516b ]---
>>>>
>>>> This warning comes from __fragmentation_index() when the requested order
>>>> is greater than MAX_ORDER.
>>>>
>>>> static int __fragmentation_index(unsigned int order,
>>>>   struct contig_page_info *info)
>>>> {
>>>>   unsigned long requested = 1UL << order;
>>>>
>>>>   if (WARN_ON_ONCE(order >= MAX_ORDER)) <= Triggered here
>>>>   return 0;
>>>>
>>>> Digging it further reveals that pageblock_order has been assigned a value
>>>> which is greater than MAX_ORDER failing the above check. But why this
>>>> happened ? Because HUGETLB_PAGE_ORDER for the given config on arm64 is
>>>> greater than MAX_ORDER.
>>>>
>>>> The solution involves enabling HUGETLB_PAGE_SIZE_VARIABLE which would make
>>>> pageblock_order a variable instead of constant HUGETLB_PAGE_ORDER. But that
>>>> change alone also did not really work as pageblock_order still got assigned
>>>> as HUGETLB_PAGE_ORDER in set_pageblock_order(). HUGETLB_PAGE_ORDER needs to
>>>> be less than MAX_ORDER for its appropriateness as pageblock_order otherwise
>>>> just fallback to MAX_ORDER - 1 as before. While here it also fixes a build
>>>> problem via type casting MAX_ORDER in rmem_cma_setup().
>>>
>>> I'm wondering, is there any real value in allowing FORCE_MAX_ZONEORDER to 
>>> be "11" with ARM64_64K_PAGES/ARM64_16K_PAGES?
>>
>> MAX_ORDER should be as high as would be required for the current config.
>> Unless THP is enabled, there is no need for it to be any higher than 11.
>> But I might be missing historical reasons around this as well. Probably
>> others from arm64 could help here.
> 
> Theoretically yes, practically no. If nobody cares about a configuration, no 
> need to make the code more complicated for that configuration.
> 
>>
>>>
>>> Meaning: are there any real use cases that actually build a kernel without 
>>> TRANSPARENT_HUGEPAGE

Re: [PATCH 3/3] dma-contiguous: Type cast MAX_ORDER as unsigned int

2021-02-11 Thread Anshuman Khandual


On 2/11/21 1:34 PM, Christoph Hellwig wrote:
> On Thu, Feb 11, 2021 at 11:52:11AM +0530, Anshuman Khandual wrote:
>> Type cast MAX_ORDER as unsigned int to fix the following build warning.
>>
>> In file included from ./include/linux/kernel.h:14,
>>  from ./include/asm-generic/bug.h:20,
>>  from ./arch/arm64/include/asm/bug.h:26,
>>  from ./include/linux/bug.h:5,
>>  from ./include/linux/mmdebug.h:5,
>>  from ./arch/arm64/include/asm/memory.h:166,
>>  from ./arch/arm64/include/asm/page.h:42,
>>  from kernel/dma/contiguous.c:46:
>> kernel/dma/contiguous.c: In function ‘rmem_cma_setup’:
>> ./include/linux/minmax.h:18:28: warning: comparison of distinct pointer
>> types lacks a cast
>>   (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
>> ^~
>> ./include/linux/minmax.h:32:4: note: in expansion of macro ‘__typecheck’
>>(__typecheck(x, y) && __no_side_effects(x, y))
>> ^~~
>> ./include/linux/minmax.h:42:24: note: in expansion of macro ‘__safe_cmp’
>>   __builtin_choose_expr(__safe_cmp(x, y), \
>> ^~
>> ./include/linux/minmax.h:58:19: note: in expansion of macro
>> ‘__careful_cmp’
>>  #define max(x, y) __careful_cmp(x, y, >)
>>^
>> kernel/dma/contiguous.c:402:35: note: in expansion of macro ‘max’
>>   phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
>>
>> Cc: Christoph Hellwig 
>> Cc: Marek Szyprowski 
>> Cc: Robin Murphy 
>> Cc: iommu@lists.linux-foundation.org
>> Cc: linux-ker...@vger.kernel.org
>> Signed-off-by: Anshuman Khandual 
>> ---
>>  kernel/dma/contiguous.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
>> index 3d63d91cba5c..1c2782349d71 100644
>> --- a/kernel/dma/contiguous.c
>> +++ b/kernel/dma/contiguous.c
>> @@ -399,7 +399,7 @@ static const struct reserved_mem_ops rmem_cma_ops = {
>>  
>>  static int __init rmem_cma_setup(struct reserved_mem *rmem)
>>  {
>> -phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
>> +phys_addr_t align = PAGE_SIZE << max((unsigned int)MAX_ORDER - 1, 
>> pageblock_order);
> 
> MAX_ORDER and pageblock_order should be the same type.  So either fix

Right.

> MAX_ORDER to be an unsigned constant, which would be fundamentally
> the right thing to do but might cause some fallout, or turn
> pageblock_order into an int, which is probably much either as the stub
> define of it already has an integer type derived from MAX_ORDER as well.

Right, will change pageblock_order as 'int' which would be easier.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 2/3] arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE

2021-02-11 Thread Anshuman Khandual



On 2/11/21 1:31 PM, Christoph Hellwig wrote:
> On Thu, Feb 11, 2021 at 11:52:10AM +0530, Anshuman Khandual wrote:
>> MAX_ORDER which invariably depends on FORCE_MAX_ZONEORDER can be a variable
>> for a given page size, depending on whether TRANSPARENT_HUGEPAGE is enabled
>> or not. In certain page size and THP combinations HUGETLB_PAGE_ORDER can be
>> greater than MAX_ORDER, making it unusable as pageblock_order.
>>
>> This enables HUGETLB_PAGE_SIZE_VARIABLE making pageblock_order a variable
>> rather than the compile time constant HUGETLB_PAGE_ORDER which could break
>> MAX_ORDER rule for certain configurations.
>>
>> Cc: Catalin Marinas 
>> Cc: Will Deacon 
>> Cc: linux-arm-ker...@lists.infradead.org
>> Cc: linux-ker...@vger.kernel.org
>> Signed-off-by: Anshuman Khandual 
>> ---
>>  arch/arm64/Kconfig | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index f39568b28ec1..8e3a5578f663 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -1909,6 +1909,10 @@ config ARCH_ENABLE_THP_MIGRATION
>>  def_bool y
>>  depends on TRANSPARENT_HUGEPAGE
>>  
>> +config HUGETLB_PAGE_SIZE_VARIABLE
> 
> Please move the definition of HUGETLB_PAGE_SIZE_VARIABLE to
> mm/Kconfig and select it from the arch Kconfigfs instead of duplicating
> the definition.

Sure, will do.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] mm/page_alloc: Fix pageblock_order when HUGETLB_PAGE_ORDER >= MAX_ORDER

2021-02-11 Thread Anshuman Khandual



On 2/11/21 1:30 PM, Christoph Hellwig wrote:
>> -if (HPAGE_SHIFT > PAGE_SHIFT)
>> +if ((HPAGE_SHIFT > PAGE_SHIFT) && (HUGETLB_PAGE_ORDER < MAX_ORDER))
> 
> No need for the braces.

Will drop them.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] mm/page_alloc: Fix pageblock_order with HUGETLB_PAGE_SIZE_VARIABLE

2021-02-11 Thread Anshuman Khandual

On 2/11/21 2:07 PM, David Hildenbrand wrote:
> On 11.02.21 07:22, Anshuman Khandual wrote:
>> The following warning gets triggered while trying to boot a 64K page size
>> without THP config kernel on arm64 platform.
>>
>> WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
>> Modules linked in:
>> CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
>> Hardware name: linux,dummy-virt (DT)
>> [    8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
>> [    8.811732] pc : __fragmentation_index+0xa4/0xc0
>> [    8.812555] lr : fragmentation_index+0xf8/0x138
>> [    8.813360] sp : 864079b0
>> [    8.813958] x29: 864079b0 x28: 0372
>> [    8.814901] x27: 7682 x26: 8000135b3948
>> [    8.815847] x25: 1fffe00010c80f48 x24: 
>> [    8.816805] x23:  x22: 000d
>> [    8.817764] x21: 0030 x20: 0005ffcb4d58
>> [    8.818712] x19: 000b x18: 
>> [    8.819656] x17:  x16: 
>> [    8.820613] x15:  x14: 8000114c6258
>> [    8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
>> [    8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
>> [    8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
>> [    8.824415] x7 : 0001 x6 : 41b58ab3
>> [    8.825359] x5 : 600010c80f48 x4 : dfff8000
>> [    8.826313] x3 : 8000102be670 x2 : 0007
>> [    8.827259] x1 : 86407a60 x0 : 000d
>> [    8.828218] Call trace:
>> [    8.828667]  __fragmentation_index+0xa4/0xc0
>> [    8.829436]  fragmentation_index+0xf8/0x138
>> [    8.830194]  compaction_suitable+0x98/0xb8
>> [    8.830934]  wakeup_kcompactd+0xdc/0x128
>> [    8.831640]  balance_pgdat+0x71c/0x7a0
>> [    8.832327]  kswapd+0x31c/0x520
>> [    8.832902]  kthread+0x224/0x230
>> [    8.833491]  ret_from_fork+0x10/0x30
>> [    8.834150] ---[ end trace 472836f79c15516b ]---
>>
>> This warning comes from __fragmentation_index() when the requested order
>> is greater than MAX_ORDER.
>>
>> static int __fragmentation_index(unsigned int order,
>>  struct contig_page_info *info)
>> {
>>  unsigned long requested = 1UL << order;
>>
>>  if (WARN_ON_ONCE(order >= MAX_ORDER)) <= Triggered here
>>  return 0;
>>
>> Digging it further reveals that pageblock_order has been assigned a value
>> which is greater than MAX_ORDER failing the above check. But why this
>> happened ? Because HUGETLB_PAGE_ORDER for the given config on arm64 is
>> greater than MAX_ORDER.
>>
>> The solution involves enabling HUGETLB_PAGE_SIZE_VARIABLE which would make
>> pageblock_order a variable instead of constant HUGETLB_PAGE_ORDER. But that
>> change alone also did not really work as pageblock_order still got assigned
>> as HUGETLB_PAGE_ORDER in set_pageblock_order(). HUGETLB_PAGE_ORDER needs to
>> be less than MAX_ORDER for its appropriateness as pageblock_order otherwise
>> just fallback to MAX_ORDER - 1 as before. While here it also fixes a build
>> problem via type casting MAX_ORDER in rmem_cma_setup().
> 
> I'm wondering, is there any real value in allowing FORCE_MAX_ZONEORDER to be 
> "11" with ARM64_64K_PAGES/ARM64_16K_PAGES?

MAX_ORDER should be as high as would be required for the current config.
Unless THP is enabled, there is no need for it to be any higher than 11.
But I might be missing historical reasons around this as well. Probably
others from arm64 could help here.

> 
> Meaning: are there any real use cases that actually build a kernel without 
> TRANSPARENT_HUGEPAGE and with ARM64_64K_PAGES/ARM64_16K_PAGES?

THP is always optional. Besides kernel builds without THP should always
be supported. Assuming that all builds will have THP enabled, might not
be accurate.

> 
> As builds are essentially broken, I assume this is not that relevant? Or how 
> long has it been broken?

Git blame shows that it's been there for some time now. But how does
that make this irrelevant ? A problem should be fixed nonetheless.

> 
> It might be easier to just drop the "TRANSPARENT_HUGEPAGE" part from the 
> FORCE_MAX_ZONEORDER config.
> 

Not sure if it would be a good idea to unnecessarily have larger MAX_ORDER
value for a given config. But I might be missing other contexts here.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 3/3] dma-contiguous: Type cast MAX_ORDER as unsigned int

2021-02-10 Thread Anshuman Khandual
Type cast MAX_ORDER as unsigned int to fix the following build warning.

In file included from ./include/linux/kernel.h:14,
 from ./include/asm-generic/bug.h:20,
 from ./arch/arm64/include/asm/bug.h:26,
 from ./include/linux/bug.h:5,
 from ./include/linux/mmdebug.h:5,
 from ./arch/arm64/include/asm/memory.h:166,
 from ./arch/arm64/include/asm/page.h:42,
 from kernel/dma/contiguous.c:46:
kernel/dma/contiguous.c: In function ‘rmem_cma_setup’:
./include/linux/minmax.h:18:28: warning: comparison of distinct pointer
types lacks a cast
  (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
^~
./include/linux/minmax.h:32:4: note: in expansion of macro ‘__typecheck’
   (__typecheck(x, y) && __no_side_effects(x, y))
^~~
./include/linux/minmax.h:42:24: note: in expansion of macro ‘__safe_cmp’
  __builtin_choose_expr(__safe_cmp(x, y), \
^~
./include/linux/minmax.h:58:19: note: in expansion of macro
‘__careful_cmp’
 #define max(x, y) __careful_cmp(x, y, >)
   ^
kernel/dma/contiguous.c:402:35: note: in expansion of macro ‘max’
  phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);

Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 kernel/dma/contiguous.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 3d63d91cba5c..1c2782349d71 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -399,7 +399,7 @@ static const struct reserved_mem_ops rmem_cma_ops = {
 
 static int __init rmem_cma_setup(struct reserved_mem *rmem)
 {
-   phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+   phys_addr_t align = PAGE_SIZE << max((unsigned int)MAX_ORDER - 1, 
pageblock_order);
phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 2/3] arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE

2021-02-10 Thread Anshuman Khandual
MAX_ORDER which invariably depends on FORCE_MAX_ZONEORDER can be a variable
for a given page size, depending on whether TRANSPARENT_HUGEPAGE is enabled
or not. In certain page size and THP combinations HUGETLB_PAGE_ORDER can be
greater than MAX_ORDER, making it unusable as pageblock_order.

This enables HUGETLB_PAGE_SIZE_VARIABLE making pageblock_order a variable
rather than the compile time constant HUGETLB_PAGE_ORDER which could break
MAX_ORDER rule for certain configurations.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 arch/arm64/Kconfig | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f39568b28ec1..8e3a5578f663 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1909,6 +1909,10 @@ config ARCH_ENABLE_THP_MIGRATION
def_bool y
depends on TRANSPARENT_HUGEPAGE
 
+config HUGETLB_PAGE_SIZE_VARIABLE
+   def_bool y
+   depends on HUGETLB_PAGE
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] mm/page_alloc: Fix pageblock_order when HUGETLB_PAGE_ORDER >= MAX_ORDER

2021-02-10 Thread Anshuman Khandual
With HUGETLB_PAGE_SIZE_VARIABLE enabled, pageblock_order cannot be assigned
as HUGETLB_PAGE_ORDER when it is greater than or equal to MAX_ORDER during
set_pageblock_order(). Otherwise  the following warning is triggered during
boot as detected on an arm64 platform.

WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
Modules linked in:
CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
Hardware name: linux,dummy-virt (DT)
[8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
[8.811732] pc : __fragmentation_index+0xa4/0xc0
[8.812555] lr : fragmentation_index+0xf8/0x138
[8.813360] sp : 864079b0
[8.813958] x29: 864079b0 x28: 0372
[8.814901] x27: 7682 x26: 8000135b3948
[8.815847] x25: 1fffe00010c80f48 x24: 
[8.816805] x23:  x22: 000d
[8.817764] x21: 0030 x20: 0005ffcb4d58
[8.818712] x19: 000b x18: 
[8.819656] x17:  x16: 
[8.820613] x15:  x14: 8000114c6258
[8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
[8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
[8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
[8.824415] x7 : 0001 x6 : 41b58ab3
[8.825359] x5 : 600010c80f48 x4 : dfff8000
[8.826313] x3 : 8000102be670 x2 : 0007
[8.827259] x1 : 86407a60 x0 : 000d
[8.828218] Call trace:
[8.828667]  __fragmentation_index+0xa4/0xc0
[8.829436]  fragmentation_index+0xf8/0x138
[8.830194]  compaction_suitable+0x98/0xb8
[8.830934]  wakeup_kcompactd+0xdc/0x128
[8.831640]  balance_pgdat+0x71c/0x7a0
[8.832327]  kswapd+0x31c/0x520
[8.832902]  kthread+0x224/0x230
[8.833491]  ret_from_fork+0x10/0x30
[8.834150] ---[ end trace 472836f79c15516b ]---

The above warning happens because pageblock_order exceeds MAX_ORDER, caused
by large HUGETLB_PAGE_ORDER on certain platforms like arm64. Lets prevent
the scenario by first checking HUGETLB_PAGE_ORDER against MAX_ORDER, before
its assignment as pageblock_order.

Cc: Andrew Morton 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 519a60d5b6f7..36473f2fa683 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6798,7 +6798,7 @@ void __init set_pageblock_order(void)
if (pageblock_order)
return;
 
-   if (HPAGE_SHIFT > PAGE_SHIFT)
+   if ((HPAGE_SHIFT > PAGE_SHIFT) && (HUGETLB_PAGE_ORDER < MAX_ORDER))
order = HUGETLB_PAGE_ORDER;
else
order = MAX_ORDER - 1;
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] mm/page_alloc: Fix pageblock_order with HUGETLB_PAGE_SIZE_VARIABLE

2021-02-10 Thread Anshuman Khandual
The following warning gets triggered while trying to boot a 64K page size
without THP config kernel on arm64 platform.

WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
Modules linked in:
CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
Hardware name: linux,dummy-virt (DT)
[8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
[8.811732] pc : __fragmentation_index+0xa4/0xc0
[8.812555] lr : fragmentation_index+0xf8/0x138
[8.813360] sp : 864079b0
[8.813958] x29: 864079b0 x28: 0372
[8.814901] x27: 7682 x26: 8000135b3948
[8.815847] x25: 1fffe00010c80f48 x24: 
[8.816805] x23:  x22: 000d
[8.817764] x21: 0030 x20: 0005ffcb4d58
[8.818712] x19: 000b x18: 
[8.819656] x17:  x16: 
[8.820613] x15:  x14: 8000114c6258
[8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
[8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
[8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
[8.824415] x7 : 0001 x6 : 41b58ab3
[8.825359] x5 : 600010c80f48 x4 : dfff8000
[8.826313] x3 : 8000102be670 x2 : 0007
[8.827259] x1 : 86407a60 x0 : 000d
[8.828218] Call trace:
[8.828667]  __fragmentation_index+0xa4/0xc0
[8.829436]  fragmentation_index+0xf8/0x138
[8.830194]  compaction_suitable+0x98/0xb8
[8.830934]  wakeup_kcompactd+0xdc/0x128
[8.831640]  balance_pgdat+0x71c/0x7a0
[8.832327]  kswapd+0x31c/0x520
[8.832902]  kthread+0x224/0x230
[8.833491]  ret_from_fork+0x10/0x30
[8.834150] ---[ end trace 472836f79c15516b ]---

This warning comes from __fragmentation_index() when the requested order
is greater than MAX_ORDER.

static int __fragmentation_index(unsigned int order,
 struct contig_page_info *info)
{
unsigned long requested = 1UL << order;

if (WARN_ON_ONCE(order >= MAX_ORDER)) <= Triggered here
return 0;

Digging it further reveals that pageblock_order has been assigned a value
which is greater than MAX_ORDER failing the above check. But why this
happened ? Because HUGETLB_PAGE_ORDER for the given config on arm64 is
greater than MAX_ORDER.

The solution involves enabling HUGETLB_PAGE_SIZE_VARIABLE which would make
pageblock_order a variable instead of constant HUGETLB_PAGE_ORDER. But that
change alone also did not really work as pageblock_order still got assigned
as HUGETLB_PAGE_ORDER in set_pageblock_order(). HUGETLB_PAGE_ORDER needs to
be less than MAX_ORDER for its appropriateness as pageblock_order otherwise
just fallback to MAX_ORDER - 1 as before. While here it also fixes a build
problem via type casting MAX_ORDER in rmem_cma_setup().

This series applies on v5.11-rc7 and has been slightly tested on arm64. But
looking for some early feedbacks particularly with respect to concerns in
subscribing HUGETLB_PAGE_SIZE_VARIABLE on a platform where the hugetlb page
size is config dependent but not really a runtime variable. Even though it
appears that HUGETLB_PAGE_SIZE_VARIABLE is used only while computing the
pageblock_order, could there be other implications ?

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Marek Szyprowski 
Cc: Christoph Hellwig 
Cc: Andrew Morton 
Cc: David Hildenbrand 
Cc: Mark Rutland 
Cc: linux-arm-ker...@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Changes in V1:

- Rebased on 5.11-rc7
- Dropped the RFC

Changes in RFC:

https://lore.kernel.org/linux-mm/1612422084-30429-1-git-send-email-anshuman.khand...@arm.com/

Anshuman Khandual (3):
  mm/page_alloc: Fix pageblock_order when HUGETLB_PAGE_ORDER >= MAX_ORDER
  arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE
  dma-contiguous: Type cast MAX_ORDER as unsigned int

 arch/arm64/Kconfig  | 4 
 kernel/dma/contiguous.c | 2 +-
 mm/page_alloc.c | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 0/3] mm/page_alloc: Fix pageblock_order with HUGETLB_PAGE_SIZE_VARIABLE

2021-02-07 Thread Anshuman Khandual



On 2/4/21 12:31 PM, Anshuman Khandual wrote:
> The following warning gets triggered while trying to boot a 64K page size
> without THP config kernel on arm64 platform.
> 
> WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
> Modules linked in:
> CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
> Hardware name: linux,dummy-virt (DT)
> [8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
> [8.811732] pc : __fragmentation_index+0xa4/0xc0
> [8.812555] lr : fragmentation_index+0xf8/0x138
> [8.813360] sp : 864079b0
> [8.813958] x29: 864079b0 x28: 0372
> [8.814901] x27: 7682 x26: 8000135b3948
> [8.815847] x25: 1fffe00010c80f48 x24: 
> [8.816805] x23:  x22: 000d
> [8.817764] x21: 0030 x20: 0005ffcb4d58
> [8.818712] x19: 000b x18: 
> [8.819656] x17:  x16: 
> [8.820613] x15:  x14: 8000114c6258
> [8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
> [8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
> [8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
> [8.824415] x7 : 0001 x6 : 41b58ab3
> [8.825359] x5 : 600010c80f48 x4 : dfff8000
> [8.826313] x3 : 8000102be670 x2 : 0007
> [8.827259] x1 : 86407a60 x0 : 000d
> [8.828218] Call trace:
> [8.828667]  __fragmentation_index+0xa4/0xc0
> [8.829436]  fragmentation_index+0xf8/0x138
> [8.830194]  compaction_suitable+0x98/0xb8
> [8.830934]  wakeup_kcompactd+0xdc/0x128
> [8.831640]  balance_pgdat+0x71c/0x7a0
> [8.832327]  kswapd+0x31c/0x520
> [8.832902]  kthread+0x224/0x230
> [8.833491]  ret_from_fork+0x10/0x30
> [8.834150] ---[ end trace 472836f79c15516b ]---
> 
> This warning comes from __fragmentation_index() when the requested order
> is greater than MAX_ORDER.
> 
> static int __fragmentation_index(unsigned int order,
>struct contig_page_info *info)
> {
> unsigned long requested = 1UL << order;
> 
> if (WARN_ON_ONCE(order >= MAX_ORDER)) <= Triggered here
> return 0;
> 
> Digging it further reveals that pageblock_order has been assigned a value
> which is greater than MAX_ORDER failing the above check. But why this
> happened ? Because HUGETLB_PAGE_ORDER for the given config on arm64 is
> greater than MAX_ORDER.
> 
> The solution involves enabling HUGETLB_PAGE_SIZE_VARIABLE which would make
> pageblock_order a variable instead of constant HUGETLB_PAGE_ORDER. But that
> change alone also did not really work as pageblock_order still got assigned
> as HUGETLB_PAGE_ORDER in set_pageblock_order(). HUGETLB_PAGE_ORDER needs to
> be less than MAX_ORDER for its appropriateness as pageblock_order otherwise
> just fallback to MAX_ORDER - 1 as before. While here it also fixes a build
> problem via type casting MAX_ORDER in rmem_cma_setup().
> 
> This series applies in v5.11-rc6 and has been slightly tested on arm64. But
> looking for some early feedbacks particularly with respect to concerns in
> subscribing HUGETLB_PAGE_SIZE_VARIABLE on a platform where the hugetlb page
> size is config dependent but not really a runtime variable. Even though it
> appears that HUGETLB_PAGE_SIZE_VARIABLE is used only while computing the
> pageblock_order, could there be other implications ?
> 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Robin Murphy 
> Cc: Marek Szyprowski 
> Cc: Christoph Hellwig 
> Cc: Andrew Morton 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: iommu@lists.linux-foundation.org
> Cc: linux...@kvack.org
> Cc: linux-ker...@vger.kernel.org

Probably missed some more folks, adding them here.

+ Michal Hocko 
+ Vlastimil Babka 
+ Mike Kravetz 
+ Matthew Wilcox 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 2/3] arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE

2021-02-05 Thread Anshuman Khandual



On 2/5/21 1:50 PM, David Hildenbrand wrote:
> On 04.02.21 08:01, Anshuman Khandual wrote:
>> MAX_ORDER which invariably depends on FORCE_MAX_ZONEORDER can be a variable
>> for a given page size, depending on whether TRANSPARENT_HUGEPAGE is enabled
>> or not. In certain page size and THP combinations HUGETLB_PAGE_ORDER can be
>> greater than MAX_ORDER, making it unusable as pageblock_order.
> 
> Just so I understand correctly, this does not imply that we have THP that 
> exceed the pageblock size / MAX_ORDER size, correct?

Correct. MAX_ORDER gets incremented when THP is enabled.

config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC 3/3] dma-contiguous: Type cast MAX_ORDER as unsigned int

2021-02-03 Thread Anshuman Khandual
Type cast MAX_ORDER as unsigned int to fix the following build warning.

In file included from ./include/linux/kernel.h:14,
 from ./include/asm-generic/bug.h:20,
 from ./arch/arm64/include/asm/bug.h:26,
 from ./include/linux/bug.h:5,
 from ./include/linux/mmdebug.h:5,
 from ./arch/arm64/include/asm/memory.h:166,
 from ./arch/arm64/include/asm/page.h:42,
 from kernel/dma/contiguous.c:46:
kernel/dma/contiguous.c: In function ‘rmem_cma_setup’:
./include/linux/minmax.h:18:28: warning: comparison of distinct pointer
types lacks a cast
  (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
^~
./include/linux/minmax.h:32:4: note: in expansion of macro ‘__typecheck’
   (__typecheck(x, y) && __no_side_effects(x, y))
^~~
./include/linux/minmax.h:42:24: note: in expansion of macro ‘__safe_cmp’
  __builtin_choose_expr(__safe_cmp(x, y), \
^~
./include/linux/minmax.h:58:19: note: in expansion of macro
‘__careful_cmp’
 #define max(x, y) __careful_cmp(x, y, >)
   ^
kernel/dma/contiguous.c:402:35: note: in expansion of macro ‘max’
  phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);

Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 kernel/dma/contiguous.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 3d63d91cba5c..1c2782349d71 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -399,7 +399,7 @@ static const struct reserved_mem_ops rmem_cma_ops = {
 
 static int __init rmem_cma_setup(struct reserved_mem *rmem)
 {
-   phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+   phys_addr_t align = PAGE_SIZE << max((unsigned int)MAX_ORDER - 1, 
pageblock_order);
phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[RFC 2/3] arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE

2021-02-03 Thread Anshuman Khandual
MAX_ORDER which invariably depends on FORCE_MAX_ZONEORDER can be a variable
for a given page size, depending on whether TRANSPARENT_HUGEPAGE is enabled
or not. In certain page size and THP combinations HUGETLB_PAGE_ORDER can be
greater than MAX_ORDER, making it unusable as pageblock_order.

This enables HUGETLB_PAGE_SIZE_VARIABLE making pageblock_order a variable
rather than the compile time constant HUGETLB_PAGE_ORDER which could break
MAX_ORDER rule for certain configurations.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 arch/arm64/Kconfig | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 175914f2f340..c4acf8230f20 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1918,6 +1918,10 @@ config ARCH_ENABLE_THP_MIGRATION
def_bool y
depends on TRANSPARENT_HUGEPAGE
 
+config HUGETLB_PAGE_SIZE_VARIABLE
+   def_bool y
+   depends on HUGETLB_PAGE
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC 1/3] mm/page_alloc: Fix pageblock_order when HUGETLB_PAGE_ORDER >= MAX_ORDER

2021-02-03 Thread Anshuman Khandual
With HUGETLB_PAGE_SIZE_VARIABLE enabled, pageblock_order cannot be assigned
as HUGETLB_PAGE_ORDER when it is greater than or equal to MAX_ORDER during
set_pageblock_order(). Otherwise  the following warning is triggered during
boot as detected on an arm64 platform.

WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
Modules linked in:
CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
Hardware name: linux,dummy-virt (DT)
[8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
[8.811732] pc : __fragmentation_index+0xa4/0xc0
[8.812555] lr : fragmentation_index+0xf8/0x138
[8.813360] sp : 864079b0
[8.813958] x29: 864079b0 x28: 0372
[8.814901] x27: 7682 x26: 8000135b3948
[8.815847] x25: 1fffe00010c80f48 x24: 
[8.816805] x23:  x22: 000d
[8.817764] x21: 0030 x20: 0005ffcb4d58
[8.818712] x19: 000b x18: 
[8.819656] x17:  x16: 
[8.820613] x15:  x14: 8000114c6258
[8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
[8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
[8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
[8.824415] x7 : 0001 x6 : 41b58ab3
[8.825359] x5 : 600010c80f48 x4 : dfff8000
[8.826313] x3 : 8000102be670 x2 : 0007
[8.827259] x1 : 86407a60 x0 : 000d
[8.828218] Call trace:
[8.828667]  __fragmentation_index+0xa4/0xc0
[8.829436]  fragmentation_index+0xf8/0x138
[8.830194]  compaction_suitable+0x98/0xb8
[8.830934]  wakeup_kcompactd+0xdc/0x128
[8.831640]  balance_pgdat+0x71c/0x7a0
[8.832327]  kswapd+0x31c/0x520
[8.832902]  kthread+0x224/0x230
[8.833491]  ret_from_fork+0x10/0x30
[8.834150] ---[ end trace 472836f79c15516b ]---

The above warning happens because pageblock_order exceeds MAX_ORDER, caused
by large HUGETLB_PAGE_ORDER on certain platforms like arm64. Lets prevent
the scenario by first checking HUGETLB_PAGE_ORDER against MAX_ORDER, before
its assignment as pageblock_order.

Cc: Andrew Morton 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 519a60d5b6f7..36473f2fa683 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6798,7 +6798,7 @@ void __init set_pageblock_order(void)
if (pageblock_order)
return;
 
-   if (HPAGE_SHIFT > PAGE_SHIFT)
+   if ((HPAGE_SHIFT > PAGE_SHIFT) && (HUGETLB_PAGE_ORDER < MAX_ORDER))
order = HUGETLB_PAGE_ORDER;
else
order = MAX_ORDER - 1;
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC 0/3] mm/page_alloc: Fix pageblock_order with HUGETLB_PAGE_SIZE_VARIABLE

2021-02-03 Thread Anshuman Khandual
The following warning gets triggered while trying to boot a 64K page size
without THP config kernel on arm64 platform.

WARNING: CPU: 5 PID: 124 at mm/vmstat.c:1080 __fragmentation_index+0xa4/0xc0
Modules linked in:
CPU: 5 PID: 124 Comm: kswapd0 Not tainted 5.11.0-rc6-4-ga0ea7d62002 #159
Hardware name: linux,dummy-virt (DT)
[8.810673] pstate: 2045 (nzCv daif +PAN -UAO -TCO BTYPE=--)
[8.811732] pc : __fragmentation_index+0xa4/0xc0
[8.812555] lr : fragmentation_index+0xf8/0x138
[8.813360] sp : 864079b0
[8.813958] x29: 864079b0 x28: 0372
[8.814901] x27: 7682 x26: 8000135b3948
[8.815847] x25: 1fffe00010c80f48 x24: 
[8.816805] x23:  x22: 000d
[8.817764] x21: 0030 x20: 0005ffcb4d58
[8.818712] x19: 000b x18: 
[8.819656] x17:  x16: 
[8.820613] x15:  x14: 8000114c6258
[8.821560] x13: 6000bff969ba x12: 1fffe000bff969b9
[8.822514] x11: 1fffe000bff969b9 x10: 6000bff969b9
[8.823461] x9 : dfff8000 x8 : 0005ffcb4dcf
[8.824415] x7 : 0001 x6 : 41b58ab3
[8.825359] x5 : 600010c80f48 x4 : dfff8000
[8.826313] x3 : 8000102be670 x2 : 0007
[8.827259] x1 : 86407a60 x0 : 000d
[8.828218] Call trace:
[8.828667]  __fragmentation_index+0xa4/0xc0
[8.829436]  fragmentation_index+0xf8/0x138
[8.830194]  compaction_suitable+0x98/0xb8
[8.830934]  wakeup_kcompactd+0xdc/0x128
[8.831640]  balance_pgdat+0x71c/0x7a0
[8.832327]  kswapd+0x31c/0x520
[8.832902]  kthread+0x224/0x230
[8.833491]  ret_from_fork+0x10/0x30
[8.834150] ---[ end trace 472836f79c15516b ]---

This warning comes from __fragmentation_index() when the requested order
is greater than MAX_ORDER.

static int __fragmentation_index(unsigned int order,
 struct contig_page_info *info)
{
unsigned long requested = 1UL << order;

if (WARN_ON_ONCE(order >= MAX_ORDER)) <= Triggered here
return 0;

Digging it further reveals that pageblock_order has been assigned a value
which is greater than MAX_ORDER failing the above check. But why this
happened ? Because HUGETLB_PAGE_ORDER for the given config on arm64 is
greater than MAX_ORDER.

The solution involves enabling HUGETLB_PAGE_SIZE_VARIABLE which would make
pageblock_order a variable instead of constant HUGETLB_PAGE_ORDER. But that
change alone also did not really work as pageblock_order still got assigned
as HUGETLB_PAGE_ORDER in set_pageblock_order(). HUGETLB_PAGE_ORDER needs to
be less than MAX_ORDER for its appropriateness as pageblock_order otherwise
just fallback to MAX_ORDER - 1 as before. While here it also fixes a build
problem via type casting MAX_ORDER in rmem_cma_setup().

This series applies in v5.11-rc6 and has been slightly tested on arm64. But
looking for some early feedbacks particularly with respect to concerns in
subscribing HUGETLB_PAGE_SIZE_VARIABLE on a platform where the hugetlb page
size is config dependent but not really a runtime variable. Even though it
appears that HUGETLB_PAGE_SIZE_VARIABLE is used only while computing the
pageblock_order, could there be other implications ?

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Marek Szyprowski 
Cc: Christoph Hellwig 
Cc: Andrew Morton 
Cc: linux-arm-ker...@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Anshuman Khandual (3):
  mm/page_alloc: Fix pageblock_order when HUGETLB_PAGE_ORDER >= MAX_ORDER
  arm64/hugetlb: Enable HUGETLB_PAGE_SIZE_VARIABLE
  dma-contiguous: Type cast MAX_ORDER as unsigned int

 arch/arm64/Kconfig  | 4 
 kernel/dma/contiguous.c | 2 +-
 mm/page_alloc.c | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH V3 1/2] mm: Replace all open encodings for NUMA_NO_NODE

2018-12-18 Thread Anshuman Khandual
At present there are multiple places where invalid node number is encoded
as -1. Even though implicitly understood it is always better to have macros
in there. Replace these open encodings for an invalid node number with the
global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
'invalid node' from various places redirecting them to a common definition.

Reviewed-by: David Hildenbrand 
Acked-by: Jeff Kirsher [ixgbe]
Acked-by: Jens Axboe   [mtip32xx]
Acked-by: Vinod Koul  [dmaengine.c]
Acked-by: Michael Ellerman [powerpc]
Acked-by: Doug Ledford [drivers/infiniband]
Signed-off-by: Anshuman Khandual 
---
 arch/alpha/include/asm/topology.h |  3 ++-
 arch/ia64/kernel/numa.c   |  2 +-
 arch/ia64/mm/discontig.c  |  6 +++---
 arch/powerpc/include/asm/pci-bridge.h |  3 ++-
 arch/powerpc/kernel/paca.c|  3 ++-
 arch/powerpc/kernel/pci-common.c  |  3 ++-
 arch/powerpc/mm/numa.c| 14 +++---
 arch/powerpc/platforms/powernv/memtrace.c |  5 +++--
 arch/sparc/kernel/pci_fire.c  |  3 ++-
 arch/sparc/kernel/pci_schizo.c|  3 ++-
 arch/sparc/kernel/psycho_common.c |  3 ++-
 arch/sparc/kernel/sbus.c  |  3 ++-
 arch/sparc/mm/init_64.c   |  6 +++---
 arch/x86/include/asm/pci.h|  3 ++-
 arch/x86/kernel/apic/x2apic_uv_x.c|  7 ---
 arch/x86/kernel/smpboot.c |  3 ++-
 drivers/block/mtip32xx/mtip32xx.c |  5 +++--
 drivers/dma/dmaengine.c   |  4 +++-
 drivers/infiniband/hw/hfi1/affinity.c |  3 ++-
 drivers/infiniband/hw/hfi1/init.c |  3 ++-
 drivers/iommu/dmar.c  |  5 +++--
 drivers/iommu/intel-iommu.c   |  3 ++-
 drivers/misc/sgi-xp/xpc_uv.c  |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 +++--
 include/linux/device.h|  2 +-
 init/init_task.c  |  3 ++-
 kernel/kthread.c  |  3 ++-
 kernel/sched/fair.c   | 15 ---
 lib/cpumask.c |  3 ++-
 mm/huge_memory.c  | 13 +++--
 mm/hugetlb.c  |  3 ++-
 mm/ksm.c  |  2 +-
 mm/memory.c   |  7 ---
 mm/memory_hotplug.c   | 12 ++--
 mm/mempolicy.c|  2 +-
 mm/page_alloc.c   |  4 ++--
 mm/page_ext.c |  2 +-
 net/core/pktgen.c |  3 ++-
 net/qrtr/qrtr.c   |  3 ++-
 39 files changed, 104 insertions(+), 74 deletions(-)

diff --git a/arch/alpha/include/asm/topology.h 
b/arch/alpha/include/asm/topology.h
index e6e13a8..5a77a40 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_NUMA
@@ -29,7 +30,7 @@ static const struct cpumask *cpumask_of_node(int node)
 {
int cpu;
 
-   if (node == -1)
+   if (node == NUMA_NO_NODE)
return cpu_all_mask;
 
cpumask_clear(_to_cpumask_map[node]);
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 92c3762..1315da6 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -74,7 +74,7 @@ void __init build_cpu_to_node_map(void)
cpumask_clear(_to_cpu_mask[node]);
 
for_each_possible_early_cpu(cpu) {
-   node = -1;
+   node = NUMA_NO_NODE;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
node = node_cpuid[i].nid;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 8a96578..f9c3675 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -227,7 +227,7 @@ void __init setup_per_cpu_areas(void)
 * CPUs are put into groups according to node.  Walk cpu_map
 * and create new groups at node boundaries.
 */
-   prev_node = -1;
+   prev_node = NUMA_NO_NODE;
ai->nr_groups = 0;
for (unit = 0; unit < nr_units; unit++) {
cpu = cpu_map[unit];
@@ -435,7 +435,7 @@ static void __init *memory_less_node_alloc(int nid, 
unsigned long pernodesize)
 {
void *ptr = NULL;
u8 best = 0xff;
-   int bestnode = -1, node, anynode = 0;
+   int bestnode = NUMA_NO_NODE, node, anynode = 0;
 
for_each_online_node(node) {
if (node_isset(node, memory_less_mask))
@@ -447,7 +447,7 @@ static void __init *memory_less_node_

[PATCH V3 0/2] Replace all open encodings for NUMA_NO_NODE

2018-12-18 Thread Anshuman Khandual
Changes in V3:

- Dropped all references to NUMA_NO_NODE as per Lubomir Rinetl
- Split the patch into two creating a new one specifically for tools
- Folded Stephen's linux-next build fix into the second patch

Changes in V2: (https://patchwork.kernel.org/patch/10698089/)

- Added inclusion of 'numa.h' header at various places per Andrew
- Updated 'dev_to_node' to use NUMA_NO_NODE instead per Vinod

Changes in V1: (https://lkml.org/lkml/2018/11/23/485)

- Dropped OCFS2 changes per Joseph
- Dropped media/video drivers changes per Hans

RFC - https://patchwork.kernel.org/patch/10678035/

Build tested this with multiple cross compiler options like alpha, sparc,
arm64, x86, powerpc, powerpc64le etc with their default config which might
not have compiled tested all driver related changes. I will appreciate
folks giving this a test in their respective build environments.

All these places for replacement were found by running the following grep
patterns on the entire kernel code. Please let me know if this might have
missed some instances. This might also have replaced some false positives.
I will appreciate suggestions, inputs and review.

1. git grep "nid == -1"
2. git grep "node == -1"
3. git grep "nid = -1"
4. git grep "node = -1"

NOTE: I can still split the first patch into multiple ones - one for each
subsystem as suggested by Lubomir if that would be better.

Anshuman Khandual (1):
  mm: Replace all open encodings for NUMA_NO_NODE

Stephen Rothwell (1):
  Tools: Replace open encodings for NUMA_NO_NODE

 arch/alpha/include/asm/topology.h |  3 ++-
 arch/ia64/kernel/numa.c   |  2 +-
 arch/ia64/mm/discontig.c  |  6 +++---
 arch/powerpc/include/asm/pci-bridge.h |  3 ++-
 arch/powerpc/kernel/paca.c|  3 ++-
 arch/powerpc/kernel/pci-common.c  |  3 ++-
 arch/powerpc/mm/numa.c| 14 +++---
 arch/powerpc/platforms/powernv/memtrace.c |  5 +++--
 arch/sparc/kernel/pci_fire.c  |  3 ++-
 arch/sparc/kernel/pci_schizo.c|  3 ++-
 arch/sparc/kernel/psycho_common.c |  3 ++-
 arch/sparc/kernel/sbus.c  |  3 ++-
 arch/sparc/mm/init_64.c   |  6 +++---
 arch/x86/include/asm/pci.h|  3 ++-
 arch/x86/kernel/apic/x2apic_uv_x.c|  7 ---
 arch/x86/kernel/smpboot.c |  3 ++-
 drivers/block/mtip32xx/mtip32xx.c |  5 +++--
 drivers/dma/dmaengine.c   |  4 +++-
 drivers/infiniband/hw/hfi1/affinity.c |  3 ++-
 drivers/infiniband/hw/hfi1/init.c |  3 ++-
 drivers/iommu/dmar.c  |  5 +++--
 drivers/iommu/intel-iommu.c   |  3 ++-
 drivers/misc/sgi-xp/xpc_uv.c  |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 +++--
 include/linux/device.h|  2 +-
 init/init_task.c  |  3 ++-
 kernel/kthread.c  |  3 ++-
 kernel/sched/fair.c   | 15 ---
 lib/cpumask.c |  3 ++-
 mm/huge_memory.c  | 13 +++--
 mm/hugetlb.c  |  3 ++-
 mm/ksm.c  |  2 +-
 mm/memory.c   |  7 ---
 mm/memory_hotplug.c   | 12 ++--
 mm/mempolicy.c|  2 +-
 mm/page_alloc.c   |  4 ++--
 mm/page_ext.c |  2 +-
 net/core/pktgen.c |  3 ++-
 net/qrtr/qrtr.c   |  3 ++-
 tools/include/linux/numa.h| 16 
 tools/perf/bench/numa.c   |  6 +++---
 41 files changed, 123 insertions(+), 77 deletions(-)
 create mode 100644 tools/include/linux/numa.h

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH V3 2/2] Tools: Replace open encodings for NUMA_NO_NODE

2018-12-18 Thread Anshuman Khandual
From: Stephen Rothwell 

This replaces all open encodings in tools with NUMA_NO_NODE.
Also linux/numa.h is now needed for the perf build.

Signed-off-by: Anshuman Khandual 
Signed-off-by: Stephen Rothwell 
---
 tools/include/linux/numa.h | 16 
 tools/perf/bench/numa.c|  6 +++---
 2 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 tools/include/linux/numa.h

diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h
new file mode 100644
index 000..110b0e5
--- /dev/null
+++ b/tools/include/linux/numa.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NUMA_H
+#define _LINUX_NUMA_H
+
+
+#ifdef CONFIG_NODES_SHIFT
+#define NODES_SHIFT CONFIG_NODES_SHIFT
+#else
+#define NODES_SHIFT 0
+#endif
+
+#define MAX_NUMNODES(1 << NODES_SHIFT)
+
+#defineNUMA_NO_NODE(-1)
+
+#endif /* _LINUX_NUMA_H */
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 4419551..e0ad5f1 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -298,7 +298,7 @@ static cpu_set_t bind_to_node(int target_node)
 
CPU_ZERO();
 
-   if (target_node == -1) {
+   if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, );
} else {
@@ -339,7 +339,7 @@ static void bind_to_memnode(int node)
unsigned long nodemask;
int ret;
 
-   if (node == -1)
+   if (node == NUMA_NO_NODE)
return;
 
BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
@@ -1363,7 +1363,7 @@ static void init_thread_data(void)
int cpu;
 
/* Allow all nodes by default: */
-   td->bind_node = -1;
+   td->bind_node = NUMA_NO_NODE;
 
/* Allow all CPUs by default: */
CPU_ZERO(>bind_cpumask);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V2] mm: Replace all open encodings for NUMA_NO_NODE

2018-12-05 Thread Anshuman Khandual



On 12/05/2018 02:56 AM, Lubomir Rintel wrote:
> On Mon, 2018-11-26 at 17:56 +0530, Anshuman Khandual wrote:
>> At present there are multiple places where invalid node number is encoded
>> as -1. Even though implicitly understood it is always better to have macros
>> in there. Replace these open encodings for an invalid node number with the
>> global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
>> 'invalid node' from various places redirecting them to a common definition.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>> Changes in V2:
>>
>> - Added inclusion of 'numa.h' header at various places per Andrew
>> - Updated 'dev_to_node' to use NUMA_NO_NODE instead per Vinod
>>
>> Changes in V1: (https://lkml.org/lkml/2018/11/23/485)
>>
>> - Dropped OCFS2 changes per Joseph
>> - Dropped media/video drivers changes per Hans
>>
>> RFC - https://patchwork.kernel.org/patch/10678035/
>>
>> Build tested this with multiple cross compiler options like alpha, sparc,
>> arm64, x86, powerpc, powerpc64le etc with their default config which might
>> not have compiled tested all driver related changes. I will appreciate
>> folks giving this a test in their respective build environment.
>>
>> All these places for replacement were found by running the following grep
>> patterns on the entire kernel code. Please let me know if this might have
>> missed some instances. This might also have replaced some false positives.
>> I will appreciate suggestions, inputs and review.
>>
>> 1. git grep "nid == -1"
>> 2. git grep "node == -1"
>> 3. git grep "nid = -1"
>> 4. git grep "node = -1"
>>
>>  arch/alpha/include/asm/topology.h |  3 ++-
>>  arch/ia64/kernel/numa.c   |  2 +-
>>  arch/ia64/mm/discontig.c  |  6 +++---
>>  arch/ia64/sn/kernel/io_common.c   |  3 ++-
>>  arch/powerpc/include/asm/pci-bridge.h |  3 ++-
>>  arch/powerpc/kernel/paca.c|  3 ++-
>>  arch/powerpc/kernel/pci-common.c  |  3 ++-
>>  arch/powerpc/mm/numa.c| 14 +++---
>>  arch/powerpc/platforms/powernv/memtrace.c |  5 +++--
>>  arch/sparc/kernel/auxio_32.c  |  3 ++-
>>  arch/sparc/kernel/pci_fire.c  |  3 ++-
>>  arch/sparc/kernel/pci_schizo.c|  3 ++-
>>  arch/sparc/kernel/pcic.c  |  7 ---
>>  arch/sparc/kernel/psycho_common.c |  3 ++-
>>  arch/sparc/kernel/sbus.c  |  3 ++-
>>  arch/sparc/mm/init_64.c   |  6 +++---
>>  arch/sparc/prom/init_32.c |  3 ++-
>>  arch/sparc/prom/init_64.c |  5 +++--
>>  arch/sparc/prom/tree_32.c | 13 +++--
>>  arch/sparc/prom/tree_64.c | 19 ++-
>>  arch/x86/include/asm/pci.h|  3 ++-
>>  arch/x86/kernel/apic/x2apic_uv_x.c|  7 ---
>>  arch/x86/kernel/smpboot.c |  3 ++-
>>  arch/x86/platform/olpc/olpc_dt.c  | 17 +
>>  drivers/block/mtip32xx/mtip32xx.c |  5 +++--
>>  drivers/dma/dmaengine.c   |  4 +++-
>>  drivers/infiniband/hw/hfi1/affinity.c |  3 ++-
>>  drivers/infiniband/hw/hfi1/init.c |  3 ++-
>>  drivers/iommu/dmar.c  |  5 +++--
>>  drivers/iommu/intel-iommu.c   |  3 ++-
>>  drivers/misc/sgi-xp/xpc_uv.c  |  3 ++-
>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 +++--
>>  include/linux/device.h|  2 +-
>>  init/init_task.c  |  3 ++-
>>  kernel/kthread.c  |  3 ++-
>>  kernel/sched/fair.c   | 15 ---
>>  lib/cpumask.c |  3 ++-
>>  mm/huge_memory.c  | 13 +++--
>>  mm/hugetlb.c  |  3 ++-
>>  mm/ksm.c  |  2 +-
>>  mm/memory.c   |  7 ---
>>  mm/memory_hotplug.c   | 12 ++--
>>  mm/mempolicy.c|  2 +-
>>  mm/page_alloc.c   |  4 ++--
>>  mm/page_ext.c |  2 +-
>>  net/core/pktgen.c |  3 ++-
>>  net/qrtr/qrtr.c

Re: [LKP] [mm] 19717e78a0: stderr.if(target_node==NUMA_NO_NODE){

2018-12-05 Thread Anshuman Khandual
On 12/05/2018 10:30 AM, kernel test robot wrote:
> FYI, we noticed the following commit (built with gcc-7):
> 
> commit: 19717e78a04d51512cf0e7b9b09c61f06b2af071 ("[PATCH V2] mm: Replace all 
> open encodings for NUMA_NO_NODE")
> url: 
> https://github.com/0day-ci/linux/commits/Anshuman-Khandual/mm-Replace-all-open-encodings-for-NUMA_NO_NODE/20181126-203831
> 
> 
> in testcase: perf-sanity-tests
> with following parameters:
> 
>   perf_compiler: gcc
>   ucode: 0x713
> 
> 
> 
> on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 8G 
> memory
> 
> caused below changes (please refer to attached dmesg/kmsg for entire 
> log/backtrace):

The fix (in Andrew's staging tree) from Stephen Rothwell which adds 

definitions to  should fix this.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V2] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-26 Thread Anshuman Khandual



On 11/26/2018 06:18 PM, David Hildenbrand wrote:
> On 26.11.18 13:26, Anshuman Khandual wrote:
>> At present there are multiple places where invalid node number is encoded
>> as -1. Even though implicitly understood it is always better to have macros
>> in there. Replace these open encodings for an invalid node number with the
>> global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
>> 'invalid node' from various places redirecting them to a common definition.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>> Changes in V2:
>>
>> - Added inclusion of 'numa.h' header at various places per Andrew
>> - Updated 'dev_to_node' to use NUMA_NO_NODE instead per Vinod
> 
> Reviewed-by: David Hildenbrand 

Thanks David. My bad, forgot to add your review tag from the earlier version.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH V2] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-26 Thread Anshuman Khandual
At present there are multiple places where invalid node number is encoded
as -1. Even though implicitly understood it is always better to have macros
in there. Replace these open encodings for an invalid node number with the
global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
'invalid node' from various places redirecting them to a common definition.

Signed-off-by: Anshuman Khandual 
---
Changes in V2:

- Added inclusion of 'numa.h' header at various places per Andrew
- Updated 'dev_to_node' to use NUMA_NO_NODE instead per Vinod

Changes in V1: (https://lkml.org/lkml/2018/11/23/485)

- Dropped OCFS2 changes per Joseph
- Dropped media/video drivers changes per Hans

RFC - https://patchwork.kernel.org/patch/10678035/

Build tested this with multiple cross compiler options like alpha, sparc,
arm64, x86, powerpc, powerpc64le etc with their default config which might
not have compiled tested all driver related changes. I will appreciate
folks giving this a test in their respective build environment.

All these places for replacement were found by running the following grep
patterns on the entire kernel code. Please let me know if this might have
missed some instances. This might also have replaced some false positives.
I will appreciate suggestions, inputs and review.

1. git grep "nid == -1"
2. git grep "node == -1"
3. git grep "nid = -1"
4. git grep "node = -1"

 arch/alpha/include/asm/topology.h |  3 ++-
 arch/ia64/kernel/numa.c   |  2 +-
 arch/ia64/mm/discontig.c  |  6 +++---
 arch/ia64/sn/kernel/io_common.c   |  3 ++-
 arch/powerpc/include/asm/pci-bridge.h |  3 ++-
 arch/powerpc/kernel/paca.c|  3 ++-
 arch/powerpc/kernel/pci-common.c  |  3 ++-
 arch/powerpc/mm/numa.c| 14 +++---
 arch/powerpc/platforms/powernv/memtrace.c |  5 +++--
 arch/sparc/kernel/auxio_32.c  |  3 ++-
 arch/sparc/kernel/pci_fire.c  |  3 ++-
 arch/sparc/kernel/pci_schizo.c|  3 ++-
 arch/sparc/kernel/pcic.c  |  7 ---
 arch/sparc/kernel/psycho_common.c |  3 ++-
 arch/sparc/kernel/sbus.c  |  3 ++-
 arch/sparc/mm/init_64.c   |  6 +++---
 arch/sparc/prom/init_32.c |  3 ++-
 arch/sparc/prom/init_64.c |  5 +++--
 arch/sparc/prom/tree_32.c | 13 +++--
 arch/sparc/prom/tree_64.c | 19 ++-
 arch/x86/include/asm/pci.h|  3 ++-
 arch/x86/kernel/apic/x2apic_uv_x.c|  7 ---
 arch/x86/kernel/smpboot.c |  3 ++-
 arch/x86/platform/olpc/olpc_dt.c  | 17 +
 drivers/block/mtip32xx/mtip32xx.c |  5 +++--
 drivers/dma/dmaengine.c   |  4 +++-
 drivers/infiniband/hw/hfi1/affinity.c |  3 ++-
 drivers/infiniband/hw/hfi1/init.c |  3 ++-
 drivers/iommu/dmar.c  |  5 +++--
 drivers/iommu/intel-iommu.c   |  3 ++-
 drivers/misc/sgi-xp/xpc_uv.c  |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 +++--
 include/linux/device.h|  2 +-
 init/init_task.c  |  3 ++-
 kernel/kthread.c  |  3 ++-
 kernel/sched/fair.c   | 15 ---
 lib/cpumask.c |  3 ++-
 mm/huge_memory.c  | 13 +++--
 mm/hugetlb.c  |  3 ++-
 mm/ksm.c  |  2 +-
 mm/memory.c   |  7 ---
 mm/memory_hotplug.c   | 12 ++--
 mm/mempolicy.c|  2 +-
 mm/page_alloc.c   |  4 ++--
 mm/page_ext.c |  2 +-
 net/core/pktgen.c |  3 ++-
 net/qrtr/qrtr.c   |  3 ++-
 tools/perf/bench/numa.c   |  6 +++---
 48 files changed, 146 insertions(+), 108 deletions(-)

diff --git a/arch/alpha/include/asm/topology.h 
b/arch/alpha/include/asm/topology.h
index e6e13a8..5a77a40 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_NUMA
@@ -29,7 +30,7 @@ static const struct cpumask *cpumask_of_node(int node)
 {
int cpu;
 
-   if (node == -1)
+   if (node == NUMA_NO_NODE)
return cpu_all_mask;
 
cpumask_clear(_to_cpumask_map[node]);
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 92c3762..1315da6 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -74,7 +74,7 @

Re: [PATCH] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-25 Thread Anshuman Khandual



On 11/24/2018 07:35 PM, Vinod Koul wrote:
> On 23-11-18, 15:24, Anshuman Khandual wrote:
> 
>> --- a/drivers/dma/dmaengine.c
>> +++ b/drivers/dma/dmaengine.c
>> @@ -386,7 +386,8 @@ EXPORT_SYMBOL(dma_issue_pending_all);
>>  static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
>>  {
>>  int node = dev_to_node(chan->device->dev);
>> -return node == -1 || cpumask_test_cpu(cpu, cpumask_of_node(node));
>> +return node == NUMA_NO_NODE ||
>> +cpumask_test_cpu(cpu, cpumask_of_node(node));
>>  }
> 
> I do not see dev_to_node being updated first, that returns -1 so I would
> prefer to check for -1 unless it return NUMA_NO_NODE

Sure will update dev_to_node() to return NUMA_NO_NODE as well.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-24 Thread Anshuman Khandual



On 11/24/2018 05:14 AM, Andrew Morton wrote:
> On Fri, 23 Nov 2018 15:24:16 +0530 Anshuman Khandual 
>  wrote:
> 
>> At present there are multiple places where invalid node number is encoded
>> as -1. Even though implicitly understood it is always better to have macros
>> in there. Replace these open encodings for an invalid node number with the
>> global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
>> 'invalid node' from various places redirecting them to a common definition.
>>
>> ...
>>
>> Build tested this with multiple cross compiler options like alpha, sparc,
>> arm64, x86, powerpc, powerpc64le etc with their default config which might
>> not have compiled tested all driver related changes. I will appreciate
>> folks giving this a test in their respective build environment.
>>
>> All these places for replacement were found by running the following grep
>> patterns on the entire kernel code. Please let me know if this might have
>> missed some instances. This might also have replaced some false positives.
>> I will appreciate suggestions, inputs and review.
>>
>> 1. git grep "nid == -1"
>> 2. git grep "node == -1"
>> 3. git grep "nid = -1"
>> 4. git grep "node = -1"
> 
> The build testing is good, but I worry that some of the affected files
> don't clearly have numa.h in their include paths, for the NUMA_NO_NODE
> definition.
> 
> The first thing I looked it is arch/powerpc/include/asm/pci-bridge.h. 
> Maybe it somehow manages to include numa.h via some nested include, but
> if so, is that reliable across all config combinations and as code
> evolves?
> 
> So I think that the patch should have added an explicit include of
> numa.h, especially in cases where the affected file previously had no
> references to any of the things which numa.h defines.

Fair enough. Will include numa.h in those particular files.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-23 Thread Anshuman Khandual
min

On 11/23/2018 04:06 PM, David Hildenbrand wrote:
> On 23.11.18 10:54, Anshuman Khandual wrote:
>> At present there are multiple places where invalid node number is encoded
>> as -1. Even though implicitly understood it is always better to have macros
>> in there. Replace these open encodings for an invalid node number with the
>> global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
>> 'invalid node' from various places redirecting them to a common definition.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>>
>> Changes in V1:
>>
>> - Dropped OCFS2 changes per Joseph
>> - Dropped media/video drivers changes per Hans
>>
>> RFC - https://patchwork.kernel.org/patch/10678035/
>>
>> Build tested this with multiple cross compiler options like alpha, sparc,
>> arm64, x86, powerpc, powerpc64le etc with their default config which might
>> not have compiled tested all driver related changes. I will appreciate
>> folks giving this a test in their respective build environment.
>>
>> All these places for replacement were found by running the following grep
>> patterns on the entire kernel code. Please let me know if this might have
>> missed some instances. This might also have replaced some false positives.
>> I will appreciate suggestions, inputs and review.
>>
>> 1. git grep "nid == -1"
>> 2. git grep "node == -1"
>> 3. git grep "nid = -1"
>> 4. git grep "node = -1"
> 
> Hopefully you found most users :)

I hope so :)

> 
> Did you check if some are encoded into function calls? f(-1, ...)

Not really. Just wondering how do we even search for it. There might be
higher level functions passing down -1 to core MM. If you have some
instances in mind which need replacement I will accommodate them.

> 
> Reviewed-by: David Hildenbrand 

Thanks for the review.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-23 Thread Anshuman Khandual
At present there are multiple places where invalid node number is encoded
as -1. Even though implicitly understood it is always better to have macros
in there. Replace these open encodings for an invalid node number with the
global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
'invalid node' from various places redirecting them to a common definition.

Signed-off-by: Anshuman Khandual 
---

Changes in V1:

- Dropped OCFS2 changes per Joseph
- Dropped media/video drivers changes per Hans

RFC - https://patchwork.kernel.org/patch/10678035/

Build tested this with multiple cross compiler options like alpha, sparc,
arm64, x86, powerpc, powerpc64le etc with their default config which might
not have compiled tested all driver related changes. I will appreciate
folks giving this a test in their respective build environment.

All these places for replacement were found by running the following grep
patterns on the entire kernel code. Please let me know if this might have
missed some instances. This might also have replaced some false positives.
I will appreciate suggestions, inputs and review.

1. git grep "nid == -1"
2. git grep "node == -1"
3. git grep "nid = -1"
4. git grep "node = -1"

 arch/alpha/include/asm/topology.h |  2 +-
 arch/ia64/kernel/numa.c   |  2 +-
 arch/ia64/mm/discontig.c  |  6 +++---
 arch/ia64/sn/kernel/io_common.c   |  2 +-
 arch/powerpc/include/asm/pci-bridge.h |  2 +-
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/pci-common.c  |  2 +-
 arch/powerpc/mm/numa.c| 14 +++---
 arch/powerpc/platforms/powernv/memtrace.c |  4 ++--
 arch/sparc/kernel/auxio_32.c  |  2 +-
 arch/sparc/kernel/pci_fire.c  |  2 +-
 arch/sparc/kernel/pci_schizo.c|  2 +-
 arch/sparc/kernel/pcic.c  |  6 +++---
 arch/sparc/kernel/psycho_common.c |  2 +-
 arch/sparc/kernel/sbus.c  |  2 +-
 arch/sparc/mm/init_64.c   |  6 +++---
 arch/sparc/prom/init_32.c |  2 +-
 arch/sparc/prom/init_64.c |  4 ++--
 arch/sparc/prom/tree_32.c | 12 ++--
 arch/sparc/prom/tree_64.c | 18 +-
 arch/x86/include/asm/pci.h|  2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c|  6 +++---
 arch/x86/kernel/smpboot.c |  2 +-
 arch/x86/platform/olpc/olpc_dt.c  | 16 
 drivers/block/mtip32xx/mtip32xx.c |  4 ++--
 drivers/dma/dmaengine.c   |  3 ++-
 drivers/infiniband/hw/hfi1/affinity.c |  2 +-
 drivers/infiniband/hw/hfi1/init.c |  2 +-
 drivers/iommu/dmar.c  |  4 ++--
 drivers/iommu/intel-iommu.c   |  2 +-
 drivers/misc/sgi-xp/xpc_uv.c  |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  4 ++--
 init/init_task.c  |  2 +-
 kernel/kthread.c  |  2 +-
 kernel/sched/fair.c   | 15 ---
 lib/cpumask.c |  2 +-
 mm/huge_memory.c  | 12 ++--
 mm/hugetlb.c  |  2 +-
 mm/ksm.c  |  2 +-
 mm/memory.c   |  6 +++---
 mm/memory_hotplug.c   | 12 ++--
 mm/mempolicy.c|  2 +-
 mm/page_alloc.c   |  4 ++--
 mm/page_ext.c |  2 +-
 net/core/pktgen.c |  2 +-
 net/qrtr/qrtr.c   |  2 +-
 tools/perf/bench/numa.c   |  6 +++---
 47 files changed, 109 insertions(+), 107 deletions(-)

diff --git a/arch/alpha/include/asm/topology.h 
b/arch/alpha/include/asm/topology.h
index e6e13a8..f6dc89c 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -29,7 +29,7 @@ static const struct cpumask *cpumask_of_node(int node)
 {
int cpu;
 
-   if (node == -1)
+   if (node == NUMA_NO_NODE)
return cpu_all_mask;
 
cpumask_clear(_to_cpumask_map[node]);
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 92c3762..1315da6 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -74,7 +74,7 @@ void __init build_cpu_to_node_map(void)
cpumask_clear(_to_cpu_mask[node]);
 
for_each_possible_early_cpu(cpu) {
-   node = -1;
+   node = NUMA_NO_NODE;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
node = n

Re: [RFC] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-12 Thread Anshuman Khandual



On 11/12/2018 02:13 PM, Hans Verkuil wrote:
> On 11/12/2018 03:41 AM, Anshuman Khandual wrote:
>> At present there are multiple places where invalid node number is encoded
>> as -1. Even though implicitly understood it is always better to have macros
>> in there. Replace these open encodings for an invalid node number with the
>> global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
>> 'invalid node' from various places redirecting them to a common definition.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>> Build tested this with multiple cross compiler options like alpha, sparc,
>> arm64, x86, powerpc64le etc with their default config which might not have
>> compiled tested all driver related changes. I will appreciate folks giving
>> this a test in their respective build environment.
>>
>> All these places for replacement were found by running the following grep
>> patterns on the entire kernel code. Please let me know if this might have
>> missed some instances. This might also have replaced some false positives.
>> I will appreciate suggestions, inputs and review.
> The 'node' in the drivers/media and the drivers/video sources has nothing to
> do with numa. It's an index for a framebuffer instead (i.e. the X in 
> /dev/fbX).

Thanks for the input. Will drop the changes there.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-11 Thread Anshuman Khandual



On 11/12/2018 09:40 AM, Anshuman Khandual wrote:
> 
> 
> On 11/12/2018 09:27 AM, Joseph Qi wrote:
>> For ocfs2 part, node means host in the cluster, not NUMA node.
>>
> 
> Does not -1 indicate an invalid node which can never be present ?
> 

My bad, got it wrong. Seems like this is nothing to do with NUMA node
at all. Will drop the changes from ocfs2.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-11 Thread Anshuman Khandual



On 11/12/2018 09:27 AM, Joseph Qi wrote:
> For ocfs2 part, node means host in the cluster, not NUMA node.
> 

Does not -1 indicate an invalid node which can never be present ?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC] mm: Replace all open encodings for NUMA_NO_NODE

2018-11-11 Thread Anshuman Khandual
At present there are multiple places where invalid node number is encoded
as -1. Even though implicitly understood it is always better to have macros
in there. Replace these open encodings for an invalid node number with the
global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like
'invalid node' from various places redirecting them to a common definition.

Signed-off-by: Anshuman Khandual 
---
Build tested this with multiple cross compiler options like alpha, sparc,
arm64, x86, powerpc64le etc with their default config which might not have
compiled tested all driver related changes. I will appreciate folks giving
this a test in their respective build environment.

All these places for replacement were found by running the following grep
patterns on the entire kernel code. Please let me know if this might have
missed some instances. This might also have replaced some false positives.
I will appreciate suggestions, inputs and review.

1. git grep "nid == -1"
2. git grep "node == -1"
3. git grep "nid = -1"
4. git grep "node = -1"

 arch/alpha/include/asm/topology.h |  2 +-
 arch/ia64/kernel/numa.c   |  2 +-
 arch/ia64/mm/discontig.c  |  6 +++---
 arch/ia64/sn/kernel/io_common.c   |  2 +-
 arch/powerpc/include/asm/pci-bridge.h |  2 +-
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/pci-common.c  |  2 +-
 arch/powerpc/mm/numa.c| 14 +++---
 arch/powerpc/platforms/powernv/memtrace.c |  4 ++--
 arch/sparc/kernel/auxio_32.c  |  2 +-
 arch/sparc/kernel/pci_fire.c  |  2 +-
 arch/sparc/kernel/pci_schizo.c|  2 +-
 arch/sparc/kernel/pcic.c  |  6 +++---
 arch/sparc/kernel/psycho_common.c |  2 +-
 arch/sparc/kernel/sbus.c  |  2 +-
 arch/sparc/mm/init_64.c   |  6 +++---
 arch/sparc/prom/init_32.c |  2 +-
 arch/sparc/prom/init_64.c |  4 ++--
 arch/sparc/prom/tree_32.c | 12 ++--
 arch/sparc/prom/tree_64.c | 18 +-
 arch/x86/include/asm/pci.h|  2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c|  6 +++---
 arch/x86/kernel/smpboot.c |  2 +-
 arch/x86/platform/olpc/olpc_dt.c  | 16 
 drivers/block/mtip32xx/mtip32xx.c |  4 ++--
 drivers/dma/dmaengine.c   |  3 ++-
 drivers/infiniband/hw/hfi1/affinity.c |  2 +-
 drivers/infiniband/hw/hfi1/init.c |  2 +-
 drivers/iommu/dmar.c  |  4 ++--
 drivers/iommu/intel-iommu.c   |  2 +-
 drivers/media/pci/ivtv/ivtvfb.c   |  2 +-
 drivers/media/platform/vivid/vivid-osd.c  |  2 +-
 drivers/misc/sgi-xp/xpc_uv.c  |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  4 ++--
 drivers/video/fbdev/mmp/fb/mmpfb.c|  2 +-
 drivers/video/fbdev/pxa168fb.c|  2 +-
 drivers/video/fbdev/w100fb.c  |  2 +-
 fs/ocfs2/dlm/dlmcommon.h  |  2 +-
 fs/ocfs2/dlm/dlmdomain.c  | 10 +-
 fs/ocfs2/dlm/dlmmaster.c  |  2 +-
 fs/ocfs2/dlm/dlmrecovery.c|  2 +-
 fs/ocfs2/stack_user.c |  6 +++---
 init/init_task.c  |  2 +-
 kernel/kthread.c  |  2 +-
 kernel/sched/fair.c   | 15 ---
 lib/cpumask.c |  2 +-
 mm/huge_memory.c  | 12 ++--
 mm/hugetlb.c  |  2 +-
 mm/ksm.c  |  2 +-
 mm/memory.c   |  6 +++---
 mm/memory_hotplug.c   | 12 ++--
 mm/mempolicy.c|  2 +-
 mm/page_alloc.c   |  4 ++--
 mm/page_ext.c |  2 +-
 net/core/pktgen.c |  2 +-
 net/qrtr/qrtr.c   |  2 +-
 tools/perf/bench/numa.c   |  6 +++---
 57 files changed, 125 insertions(+), 123 deletions(-)

diff --git a/arch/alpha/include/asm/topology.h 
b/arch/alpha/include/asm/topology.h
index e6e13a8..f6dc89c 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -29,7 +29,7 @@ static const struct cpumask *cpumask_of_node(int node)
 {
int cpu;
 
-   if (node == -1)
+   if (node == NUMA_NO_NODE)
return cpu_all_mask;
 
cpumask_clear(_to_cpumask_map[node]);
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 92c3762..1315da6 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/i

Re: [PATCH 11/12] swiotlb: move the SWIOTLB config symbol to lib/Kconfig

2018-04-17 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This way we have one central definition of it, and user can select it as
> needed.  Note that we also add a second ARCH_HAS_SWIOTLB symbol to
> indicate the architecture supports swiotlb at all, so that we can still
> make the usage optional for a few architectures that want this feature
> to be user selectable.
> 
> Signed-off-by: Christoph Hellwig 


snip

> +
> +config SWIOTLB
> + bool "SWIOTLB support"
> + default ARCH_HAS_SWIOTLB
> + select DMA_DIRECT_OPS
> + select NEED_DMA_MAP_STATE
> + select NEED_SG_DMA_LENGTH
> + ---help---
> +   Support for IO bounce buffering for systems without an IOMMU.
> +   This allows us to DMA to the full physical address space on
> +   platforms where the size of a physical address is larger
> +   than the bus address.  If unsure, say Y.
> +
>  config CHECK_SIGNATURE
>   bool

Pulling DMA_DIRECT_OPS config option by default when SWIOTLB is enabled
makes sense. This option was also needed to be enabled separately even
to use swiotlb_dma_ops.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/12] iommu-common: move to arch/sparc

2018-04-16 Thread Anshuman Khandual
On 04/16/2018 07:28 PM, David Miller wrote:
> From: Anshuman Khandual <khand...@linux.vnet.ibm.com>
> Date: Mon, 16 Apr 2018 14:26:07 +0530
> 
>> On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
>>> This code is only used by sparc, and all new iommu drivers should use the
>>> drivers/iommu/ framework.  Also remove the unused exports.
>>>
>>> Signed-off-by: Christoph Hellwig <h...@lst.de>
>>
>> Right, these functions are used only from SPARC architecture. Simple
>> git grep confirms it as well. Hence it makes sense to move them into
>> arch code instead.
> 
> Well, we put these into a common location and used type friendly for
> powerpc because we hoped powerpc would convert over to using this
> common piece of code as well.
> 
> But nobody did the powerpc work.
> 
> If you look at the powerpc iommu support, it's the same code basically
> for entry allocation.

I understand. But there are some differences in iommu_table structure,
how both regular and large IOMMU pools are being initialized etc. So
if the movement of code into SPARC help cleaning up these generic config
options in general, I guess we should do that. But I will leave it upto
others who have more experience in this area.

+mpe

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 06/12] dma-mapping: move the NEED_DMA_MAP_STATE config symbol to lib/Kconfig

2018-04-16 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This way we have one central definition of it, and user can select it as
> needed.  Note that we now also always select it when CONFIG_DMA_API_DEBUG
> is select, which fixes some incorrect checks in a few network drivers.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 05/12] scatterlist: move the NEED_SG_DMA_LENGTH config symbol to lib/Kconfig

2018-04-16 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This way we have one central definition of it, and user can select it as
> needed.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 02/12] iommu-helper: unexport iommu_area_alloc

2018-04-16 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This function is only used by built-in code.
> 
> Reviewed-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 04/12] iommu-helper: move the IOMMU_HELPER config symbol to lib/

2018-04-16 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This way we have one central definition of it, and user can select it as
> needed.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/12] iommu-common: move to arch/sparc

2018-04-16 Thread Anshuman Khandual
On 04/15/2018 08:29 PM, Christoph Hellwig wrote:
> This code is only used by sparc, and all new iommu drivers should use the
> drivers/iommu/ framework.  Also remove the unused exports.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Right, these functions are used only from SPARC architecture. Simple
git grep confirms it as well. Hence it makes sense to move them into
arch code instead.

git grep iommu_tbl_pool_init


arch/sparc/include/asm/iommu-common.h:extern void iommu_tbl_pool_init(struct 
iommu_map_table *iommu,
arch/sparc/kernel/iommu-common.c:void iommu_tbl_pool_init(struct 
iommu_map_table *iommu,
arch/sparc/kernel/iommu.c:  iommu_tbl_pool_init(>tbl, 
num_tsb_entries, IO_PAGE_SHIFT,
arch/sparc/kernel/ldc.c:iommu_tbl_pool_init(iommu, num_tsb_entries, 
PAGE_SHIFT,
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_pool_init(>tbl, num_iotte, 
IO_PAGE_SHIFT,
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_pool_init(>tbl, 
num_tsb_entries, IO_PAGE_SHIFT,

git grep iommu_tbl_range_alloc
--

arch/sparc/include/asm/iommu-common.h:extern unsigned long 
iommu_tbl_range_alloc(struct device *dev,
arch/sparc/kernel/iommu-common.c:unsigned long iommu_tbl_range_alloc(struct 
device *dev,
arch/sparc/kernel/iommu.c:  entry = iommu_tbl_range_alloc(dev, >tbl, 
npages, NULL,
arch/sparc/kernel/iommu.c:  entry = iommu_tbl_range_alloc(dev, 
>tbl, npages,
arch/sparc/kernel/ldc.c:entry = iommu_tbl_range_alloc(NULL, 
>iommu_map_table,
arch/sparc/kernel/pci_sun4v.c:  entry = iommu_tbl_range_alloc(dev, tbl, npages, 
NULL,
arch/sparc/kernel/pci_sun4v.c:  entry = iommu_tbl_range_alloc(dev, tbl, npages, 
NULL,
arch/sparc/kernel/pci_sun4v.c:  entry = iommu_tbl_range_alloc(dev, tbl, 
npages,

git grep iommu_tbl_range_free
-

arch/sparc/include/asm/iommu-common.h:extern void iommu_tbl_range_free(struct 
iommu_map_table *iommu,
arch/sparc/kernel/iommu-common.c:void iommu_tbl_range_free(struct 
iommu_map_table *iommu, u64 dma_addr,
arch/sparc/kernel/iommu.c:  iommu_tbl_range_free(>tbl, dvma, npages, 
IOMMU_ERROR_CODE);
arch/sparc/kernel/iommu.c:  iommu_tbl_range_free(>tbl, bus_addr, 
npages, IOMMU_ERROR_CODE);
arch/sparc/kernel/iommu.c:  
iommu_tbl_range_free(>tbl, vaddr, npages,
arch/sparc/kernel/iommu.c:  iommu_tbl_range_free(>tbl, 
dma_handle, npages,
arch/sparc/kernel/ldc.c:iommu_tbl_range_free(>iommu_map_table, 
cookie, npages, entry);
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, *dma_addrp, npages, 
IOMMU_ERROR_CODE);
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, dvma, npages, 
IOMMU_ERROR_CODE);
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, bus_addr, npages, 
IOMMU_ERROR_CODE);
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, bus_addr, npages, 
IOMMU_ERROR_CODE);
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, 
vaddr, npages,
arch/sparc/kernel/pci_sun4v.c:  iommu_tbl_range_free(tbl, dma_handle, 
npages,

Reviewed-by: Anshuman Khandual <khand...@linux.vnet.ibm.com>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu