Re: [PATCH 07/15] x86: remove the IOMMU table infrastructure

2022-04-06 Thread Konrad Rzeszutek Wilk
On Mon, Apr 04, 2022 at 07:05:51AM +0200, Christoph Hellwig wrote:
> The IOMMU table tries to separate the different IOMMUs into different
> backends, but actually requires various cross calls.
> 
> Rewrite the code to do the generic swiotlb/swiotlb-xen setup directly
> in pci-dma.c and then just call into the IOMMU drivers.
> 
> Signed-off-by: Christoph Hellwig 

Hey Christoph,

There is a bit of background behind this - this whole IOMMU table
dynamic was done as at that point of time the pci_iommu_alloc was getting
way to unwieldy - and there needed to be a more 'structured' way with
dependencies.

Hence this creation... But as Christoph points out - it has gotten out
of hand. So smashing it back to a more simplistic mechanism is good.

Reviewed-by: Konrad Rzeszutek Wilk 

Thank you!

> ---
>  arch/ia64/include/asm/iommu_table.h|   7 --
>  arch/x86/include/asm/dma-mapping.h |   1 -
>  arch/x86/include/asm/gart.h|   5 +-
>  arch/x86/include/asm/iommu.h   |   6 ++
>  arch/x86/include/asm/iommu_table.h | 102 ---
>  arch/x86/include/asm/swiotlb.h |  30 ---
>  arch/x86/include/asm/xen/swiotlb-xen.h |   2 -
>  arch/x86/kernel/Makefile   |   2 -
>  arch/x86/kernel/amd_gart_64.c  |   5 +-
>  arch/x86/kernel/aperture_64.c  |  14 ++--
>  arch/x86/kernel/pci-dma.c  | 107 -
>  arch/x86/kernel/pci-iommu_table.c  |  77 --
>  arch/x86/kernel/pci-swiotlb.c  |  77 --
>  arch/x86/kernel/tboot.c|   1 -
>  arch/x86/kernel/vmlinux.lds.S  |  12 ---
>  arch/x86/xen/Makefile  |   2 -
>  arch/x86/xen/pci-swiotlb-xen.c |  96 --
>  drivers/iommu/amd/init.c   |   6 --
>  drivers/iommu/amd/iommu.c  |   5 +-
>  drivers/iommu/intel/dmar.c |   6 +-
>  include/linux/dmar.h   |   6 +-
>  21 files changed, 110 insertions(+), 459 deletions(-)
>  delete mode 100644 arch/ia64/include/asm/iommu_table.h
>  delete mode 100644 arch/x86/include/asm/iommu_table.h
>  delete mode 100644 arch/x86/include/asm/swiotlb.h
>  delete mode 100644 arch/x86/kernel/pci-iommu_table.c
>  delete mode 100644 arch/x86/kernel/pci-swiotlb.c
>  delete mode 100644 arch/x86/xen/pci-swiotlb-xen.c
> 
> diff --git a/arch/ia64/include/asm/iommu_table.h 
> b/arch/ia64/include/asm/iommu_table.h
> deleted file mode 100644
> index cc96116ac276a..0
> --- a/arch/ia64/include/asm/iommu_table.h
> +++ /dev/null
> @@ -1,7 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_IA64_IOMMU_TABLE_H
> -#define _ASM_IA64_IOMMU_TABLE_H
> -
> -#define IOMMU_INIT_POST(_detect)
> -
> -#endif /* _ASM_IA64_IOMMU_TABLE_H */
> diff --git a/arch/x86/include/asm/dma-mapping.h 
> b/arch/x86/include/asm/dma-mapping.h
> index bb1654fe0ce74..256fd8115223d 100644
> --- a/arch/x86/include/asm/dma-mapping.h
> +++ b/arch/x86/include/asm/dma-mapping.h
> @@ -9,7 +9,6 @@
>  
>  #include 
>  #include 
> -#include 
>  
>  extern int iommu_merge;
>  extern int panic_on_overflow;
> diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
> index 3185565743459..5af8088a10df6 100644
> --- a/arch/x86/include/asm/gart.h
> +++ b/arch/x86/include/asm/gart.h
> @@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled;
>  extern void early_gart_iommu_check(void);
>  extern int gart_iommu_init(void);
>  extern void __init gart_parse_options(char *);
> -extern int gart_iommu_hole_init(void);
> +void gart_iommu_hole_init(void);
>  
>  #else
>  #define gart_iommu_aperture0
> @@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void)
>  static inline void gart_parse_options(char *options)
>  {
>  }
> -static inline int gart_iommu_hole_init(void)
> +static inline void gart_iommu_hole_init(void)
>  {
> - return -ENODEV;
>  }
>  #endif
>  
> diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
> index bf1ed2ddc74bd..dba89ed40d38d 100644
> --- a/arch/x86/include/asm/iommu.h
> +++ b/arch/x86/include/asm/iommu.h
> @@ -9,6 +9,12 @@
>  extern int force_iommu, no_iommu;
>  extern int iommu_detected;
>  
> +#ifdef CONFIG_SWIOTLB
> +extern bool x86_swiotlb_enable;
> +#else
> +#define x86_swiotlb_enable false
> +#endif
> +
>  /* 10 seconds */
>  #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
>  
> diff --git a/arch/x86/include/asm/iommu_table.h 
> b/arch/x86/include/asm/iommu_table.h
> deleted file mode 100644
> index 1fb3fd1a83c25..0
> --- a/arch/x86/include/asm/iommu_table.h
> +++ /de

Re: cleanup swiotlb initialization v8

2022-04-06 Thread Konrad Rzeszutek Wilk
On Mon, Apr 04, 2022 at 07:05:44AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this series tries to clean up the swiotlb initialization, including
> that of swiotlb-xen.  To get there is also removes the x86 iommu table
> infrastructure that massively obsfucates the initialization path.
> 
> Git tree:
> 
> git://git.infradead.org/users/hch/misc.git swiotlb-init-cleanup
> 
> Gitweb:
> 
> 
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/swiotlb-init-cleanup
> 
> Changes since v7:
>  - rebased to Linux 5.18-rc1
>  - better document the lower bound swiotlb size for xen-swiotlb
>  - improve the nslabs calculation for the retry case in
>swiotlb_init_remap and swiotlb_init_late

Hey Christoph,

Feel free to tack on

Reviewed-by: Konrad Rzeszutek Wilk 

on them if you would like.

Thank you for doing the spring cleaning of this codebase!


Re: [PATCH 10/15] swiotlb: add a SWIOTLB_ANY flag to lift the low memory restriction

2022-04-06 Thread Konrad Rzeszutek Wilk
> diff --git a/arch/powerpc/platforms/pseries/svm.c 
> b/arch/powerpc/platforms/pseries/svm.c
> index c5228f4969eb2..3b4045d508ec8 100644
> --- a/arch/powerpc/platforms/pseries/svm.c
> +++ b/arch/powerpc/platforms/pseries/svm.c
> @@ -28,7 +28,7 @@ static int __init init_svm(void)
>* need to use the SWIOTLB buffer for DMA even if dma_capable() says
>* otherwise.
>*/
> - swiotlb_force = SWIOTLB_FORCE;
> + ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE;

This is the only place you set the ppc_swiotlb_flags.. so I wonder why
the '|=' instead of just '=' ?

Either way:
Reviewed-by: Konrad Rzeszutek Wilk 

Thank you!


Re: [PATCH 07/11] x86: remove the IOMMU table infrastructure

2022-03-01 Thread Konrad Rzeszutek Wilk
> -#include 
> -
> -/*
> - * History lesson:
> - * The execution chain of IOMMUs in 2.6.36 looks as so:
> - *
> - *[xen-swiotlb]
> - * |
> - * +[swiotlb *]--+
> - */ | \
> - *   /  |  \
> - *[GART] [Calgary]  [Intel VT-d]
> - * /
> - */
> - * [AMD-Vi]

.. snip..
> - *
>  void __init pci_iommu_alloc(void)
>  {
> - struct iommu_table_entry *p;
> -
> - sort_iommu_table(__iommu_table, __iommu_table_end);
> - check_iommu_entries(__iommu_table, __iommu_table_end);
> -
> - for (p = __iommu_table; p < __iommu_table_end; p++) {
> - if (p && p->detect && p->detect() > 0) {
> - p->flags |= IOMMU_DETECTED;
> - if (p->early_init)
> - p->early_init();
> - if (p->flags & IOMMU_FINISH_IF_DETECTED)
> - break;
> - }
> + if (xen_pv_domain()) {
> + pci_xen_swiotlb_init();
> + return;
>   }
> + pci_swiotlb_detect_4gb();

I think you also need to check for IBM Calgary?

> + gart_iommu_hole_init();
> + amd_iommu_detect();
> + detect_intel_iommu();
> + if (x86_swiotlb_enable)
> + swiotlb_init(0);


Re: [powerpc][next-20210727] Boot failure - kernel BUG at arch/powerpc/kernel/interrupt.c:98!

2021-07-29 Thread Konrad Rzeszutek Wilk
On Thu, Jul 29, 2021 at 05:13:36PM +0100, Will Deacon wrote:
> On Wed, Jul 28, 2021 at 10:35:34AM -0700, Nathan Chancellor wrote:
> > On Wed, Jul 28, 2021 at 01:31:06PM +0530, Sachin Sant wrote:
> > > next-20210723 was good. The boot failure seems to have been introduced 
> > > with next-20210726.
> > > 
> > > I have attached the boot log.
> > 
> > I noticed this with OpenSUSE's ppc64le config [1] and my bisect landed on
> > commit ad6c00283163 ("swiotlb: Free tbl memory in swiotlb_exit()"). That
> > series just keeps on giving...

Low-level across platform do that. And thank you for testing it and
finding this bug. Please let me know if the patch works so I can add it
in in the patch series.
> 
> Yes, but look how handy our new print is!

:)
> 
> [0.010799] software IO TLB: tearing down default memory pool
> [0.010805] [ cut here ]
> [0.010808] kernel BUG at arch/powerpc/kernel/interrupt.c:98!
> 
> Following Nick's suggestion, the diff below should help? I don't have a
> relevant box on which I can test it though.
> 
> Will
> 
> --->8
> 
> diff --git a/arch/powerpc/platforms/pseries/svm.c 
> b/arch/powerpc/platforms/pseries/svm.c
> index 1d829e257996..87f001b4c4e4 100644
> --- a/arch/powerpc/platforms/pseries/svm.c
> +++ b/arch/powerpc/platforms/pseries/svm.c
> @@ -63,6 +63,9 @@ void __init svm_swiotlb_init(void)
>  
>  int set_memory_encrypted(unsigned long addr, int numpages)
>  {
> +   if (!mem_encrypt_active())
> +   return 0;
> +
> if (!PAGE_ALIGNED(addr))
> return -EINVAL;
>  
> @@ -73,6 +76,9 @@ int set_memory_encrypted(unsigned long addr, int numpages)
>  
>  int set_memory_decrypted(unsigned long addr, int numpages)
>  {
> +   if (!mem_encrypt_active())
> +   return 0;
> +
> if (!PAGE_ALIGNED(addr))
> return -EINVAL;
>  


Re: [PATCH v15 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-07-13 Thread Konrad Rzeszutek Wilk
..snip..
> > > I think the main question I have is how would you like to see patches for
> > > 5.15? i.e. as patches on top of devel/for-linus-5.14 or something else?
> > 
> > Yes that would be perfect. If there are any dependencies on the rc1, I
> > can rebase it on top of that.
> 
> Yes, please, rebasing would be very helpful. The broader rework of
> 'io_tlb_default_mem' is going to conflict quite badly otherwise.

There is a devel/for-linus-5.15 (based on v5.14-rc1) now.

Thank you!
> 
> Cheers,
> 
> Will


Re: [PATCH v15 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-07-06 Thread Konrad Rzeszutek Wilk
On Tue, Jul 06, 2021 at 05:57:21PM +0100, Will Deacon wrote:
> On Tue, Jul 06, 2021 at 10:46:07AM -0400, Konrad Rzeszutek Wilk wrote:
> > On Tue, Jul 06, 2021 at 04:05:13PM +0200, Christoph Hellwig wrote:
> > > On Tue, Jul 06, 2021 at 03:01:04PM +0100, Robin Murphy wrote:
> > > > FWIW I was pondering the question of whether to do something along 
> > > > those 
> > > > lines or just scrap the default assignment entirely, so since I hadn't 
> > > > got 
> > > > round to saying that I've gone ahead and hacked up the alternative 
> > > > (similarly untested) for comparison :)
> > > >
> > > > TBH I'm still not sure which one I prefer...
> > > 
> > > Claire did implement something like your suggestion originally, but
> > > I don't really like it as it doesn't scale for adding multiple global
> > > pools, e.g. for the 64-bit addressable one for the various encrypted
> > > secure guest schemes.
> > 
> > Couple of things:
> >  - I am not pushing to Linus the Claire's patchset until we have a
> >resolution on this. I hope you all agree that is a sensible way
> >forward as much as I hate doing that.
> 
> Sure, it's a pity but we could clearly use a bit more time to get these
> just right and we've run out of time for 5.14.
> 
> I think the main question I have is how would you like to see patches for
> 5.15? i.e. as patches on top of devel/for-linus-5.14 or something else?

Yes that would be perfect. If there are any dependencies on the rc1, I
can rebase it on top of that.

> 
> Cheers,
> 
> Will


Re: [PATCH v15 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-07-06 Thread Konrad Rzeszutek Wilk
On Tue, Jul 06, 2021 at 04:05:13PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 06, 2021 at 03:01:04PM +0100, Robin Murphy wrote:
> > FWIW I was pondering the question of whether to do something along those 
> > lines or just scrap the default assignment entirely, so since I hadn't got 
> > round to saying that I've gone ahead and hacked up the alternative 
> > (similarly untested) for comparison :)
> >
> > TBH I'm still not sure which one I prefer...
> 
> Claire did implement something like your suggestion originally, but
> I don't really like it as it doesn't scale for adding multiple global
> pools, e.g. for the 64-bit addressable one for the various encrypted
> secure guest schemes.

Couple of things:
 - I am not pushing to Linus the Claire's patchset until we have a
   resolution on this. I hope you all agree that is a sensible way
   forward as much as I hate doing that.

 - I like Robin's fix as it is simplest looking. Would love to see if it
   does fix the problem.

 - Christopher - we can always add multiple pools as the next milestone
   and just focus on this feature getting tested extensively during this
   release.

 - Would it be worth (for future or maybe in another tiny fix) to also add
   a printk in swiotlb when we de-allocate the buffer so when someone looks
   through the `dmesg` it becomes much easier to diagnose issues?



Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-06-24 Thread Konrad Rzeszutek Wilk
On Thu, Jun 24, 2021 at 11:58:57PM +0800, Claire Chang wrote:
> On Thu, Jun 24, 2021 at 11:56 PM Konrad Rzeszutek Wilk
>  wrote:
> >
> > On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote:
> > >
> > >
> > > On 6/24/2021 7:48 AM, Will Deacon wrote:
> > > > Ok, diff below which attempts to tackle the offset issue I mentioned as
> > > > well. Qian Cai -- please can you try with these changes?
> > >
> > > This works fine.
> >
> > Cool. Let me squash this patch in #6 and rebase the rest of them.
> >
> > Claire, could you check the devel/for-linus-5.14 say by end of today to
> > double check that I didn't mess anything up please?
> 
> I just submitted v15 here
> (https://lore.kernel.org/patchwork/cover/1451322/) in case it's
> helpful.

Oh! Nice!
> I'll double check of course. Thanks for the efforts!

I ended up using your patch #6 and #7. Please double-check.


Re: [PATCH v15 00/12] Restricted DMA

2021-06-24 Thread Konrad Rzeszutek Wilk
On Thu, Jun 24, 2021 at 11:55:14PM +0800, Claire Chang wrote:
> This series implements mitigations for lack of DMA access control on
> systems without an IOMMU, which could result in the DMA accessing the
> system memory at unexpected times and/or unexpected addresses, possibly
> leading to data leakage or corruption.
> 
> For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> not behind an IOMMU. As PCI-e, by design, gives the device full access to
> system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> full chain of exploits; [2], [3]).
> 
> To mitigate the security concerns, we introduce restricted DMA. Restricted
> DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> specially allocated region and does memory allocation from the same region.
> The feature on its own provides a basic level of protection against the DMA
> overwriting buffer contents at unexpected times. However, to protect
> against general data leakage and system memory corruption, the system needs
> to provide a way to restrict the DMA to a predefined memory region (this is
> usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).
> 
> [1a] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
> [1b] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
> [2] https://blade.tencent.com/en/advisories/qualpwn/
> [3] 
> https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
> [4] 
> https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132
> 
> v15:
> - Apply Will's diff (https://lore.kernel.org/patchwork/patch/1448957/#1647521)
>   to fix the crash reported by Qian.
> - Add Stefano's Acked-by tag for patch 01/12 from v14

That all should be now be on

https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/
devel/for-linus-5.14 (and linux-next)



Re: [PATCH v14 06/12] swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing

2021-06-24 Thread Konrad Rzeszutek Wilk
On Thu, Jun 24, 2021 at 10:10:51AM -0400, Qian Cai wrote:
> 
> 
> On 6/24/2021 7:48 AM, Will Deacon wrote:
> > Ok, diff below which attempts to tackle the offset issue I mentioned as
> > well. Qian Cai -- please can you try with these changes?
> 
> This works fine.

Cool. Let me squash this patch in #6 and rebase the rest of them.

Claire, could you check the devel/for-linus-5.14 say by end of today to
double check that I didn't mess anything up please?

Will,

Thank you for generating the fix! I am going to run it on x86 and Xen
to make sure all is good (granted last time I ran devel/for-linus-5.14
on that setup I didn't see any errors so I need to double check
I didn't do something silly like run a wrong kernel).


> 
> > 
> > Will
> > 
> > --->8
> > 
> > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> > index 175b6c113ed8..39284ff2a6cd 100644
> > --- a/include/linux/swiotlb.h
> > +++ b/include/linux/swiotlb.h
> > @@ -116,7 +116,9 @@ static inline bool is_swiotlb_buffer(struct device 
> > *dev, phys_addr_t paddr)
> >  
> >  static inline bool is_swiotlb_force_bounce(struct device *dev)
> >  {
> > -   return dev->dma_io_tlb_mem->force_bounce;
> > +   struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> > +
> > +   return mem && mem->force_bounce;
> >  }
> >  
> >  void __init swiotlb_exit(void);
> > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > index 44be8258e27b..0ffbaae9fba2 100644
> > --- a/kernel/dma/swiotlb.c
> > +++ b/kernel/dma/swiotlb.c
> > @@ -449,6 +449,7 @@ static int swiotlb_find_slots(struct device *dev, 
> > phys_addr_t orig_addr,
> > dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
> > unsigned int nslots = nr_slots(alloc_size), stride;
> > unsigned int index, wrap, count = 0, i;
> > +   unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> > unsigned long flags;
> >  
> > BUG_ON(!nslots);
> > @@ -497,7 +498,7 @@ static int swiotlb_find_slots(struct device *dev, 
> > phys_addr_t orig_addr,
> > for (i = index; i < index + nslots; i++) {
> > mem->slots[i].list = 0;
> > mem->slots[i].alloc_size =
> > -   alloc_size - ((i - index) << IO_TLB_SHIFT);
> > +   alloc_size - (offset + ((i - index) << 
> > IO_TLB_SHIFT));
> > }
> > for (i = index - 1;
> >  io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
> > 


Re: [PATCH v14 00/12] Restricted DMA

2021-06-23 Thread Konrad Rzeszutek Wilk
On Sat, Jun 19, 2021 at 11:40:31AM +0800, Claire Chang wrote:
> This series implements mitigations for lack of DMA access control on
> systems without an IOMMU, which could result in the DMA accessing the
> system memory at unexpected times and/or unexpected addresses, possibly
> leading to data leakage or corruption.
> 
> For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> not behind an IOMMU. As PCI-e, by design, gives the device full access to
> system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> full chain of exploits; [2], [3]).
> 
> To mitigate the security concerns, we introduce restricted DMA. Restricted
> DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> specially allocated region and does memory allocation from the same region.
> The feature on its own provides a basic level of protection against the DMA
> overwriting buffer contents at unexpected times. However, to protect
> against general data leakage and system memory corruption, the system needs
> to provide a way to restrict the DMA to a predefined memory region (this is
> usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).
> 
> [1a] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
> [1b] 
> https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
> [2] https://blade.tencent.com/en/advisories/qualpwn/
> [3] 
> https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
> [4] 
> https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132

Heya Claire,

I put all your patches on
https://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git/log/?h=devel/for-linus-5.14

Please double-check that they all look ok.

Thank you!


Re: [PATCH v10 03/12] swiotlb: Set dev->dma_io_tlb_mem to the swiotlb pool used

2021-06-15 Thread Konrad Rzeszutek Wilk
On Tue, Jun 15, 2021 at 09:27:02PM +0800, Claire Chang wrote:
> Always have the pointer to the swiotlb pool used in struct device. This
> could help simplify the code for other pools.

Applying: swiotlb: Set dev->dma_io_tlb_mem to the swiotlb pool used
error: patch failed: kernel/dma/swiotlb.c:339
error: kernel/dma/swiotlb.c: patch does not apply
..

Would you be OK rebasing this against devel/for-linus-5.14 please?
(And please send out with the Reviewed-by from Christopher)

Thank you!
> 
> Signed-off-by: Claire Chang 
> ---
>  drivers/base/core.c| 4 
>  include/linux/device.h | 4 
>  kernel/dma/swiotlb.c   | 8 
>  3 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index b8a8c96dca58..eeb2d49d3aa3 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include  /* for dma_default_coherent */
>  
> @@ -2846,6 +2847,9 @@ void device_initialize(struct device *dev)
>  defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
>   dev->dma_coherent = dma_default_coherent;
>  #endif
> +#ifdef CONFIG_SWIOTLB
> + dev->dma_io_tlb_mem = io_tlb_default_mem;
> +#endif
>  }
>  EXPORT_SYMBOL_GPL(device_initialize);
>  
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 4443e12238a0..2e9a378c9100 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -432,6 +432,7 @@ struct dev_links_info {
>   * @dma_pools:   Dma pools (if dma'ble device).
>   * @dma_mem: Internal for coherent mem override.
>   * @cma_area:Contiguous memory area for dma allocations
> + * @dma_io_tlb_mem: Pointer to the swiotlb pool used.  Not for driver use.
>   * @archdata:For arch-specific additions.
>   * @of_node: Associated device tree node.
>   * @fwnode:  Associated device node supplied by platform firmware.
> @@ -540,6 +541,9 @@ struct device {
>  #ifdef CONFIG_DMA_CMA
>   struct cma *cma_area;   /* contiguous memory area for dma
>  allocations */
> +#endif
> +#ifdef CONFIG_SWIOTLB
> + struct io_tlb_mem *dma_io_tlb_mem;
>  #endif
>   /* arch specific additions */
>   struct dev_archdata archdata;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 97c6ad50fdc2..949a6bb21343 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -339,7 +339,7 @@ void __init swiotlb_exit(void)
>  static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t 
> size,
>  enum dma_data_direction dir)
>  {
> - struct io_tlb_mem *mem = io_tlb_default_mem;
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>   int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
>   phys_addr_t orig_addr = mem->slots[index].orig_addr;
>   size_t alloc_size = mem->slots[index].alloc_size;
> @@ -421,7 +421,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
> unsigned int index)
>  static int find_slots(struct device *dev, phys_addr_t orig_addr,
>   size_t alloc_size)
>  {
> - struct io_tlb_mem *mem = io_tlb_default_mem;
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>   unsigned long boundary_mask = dma_get_seg_boundary(dev);
>   dma_addr_t tbl_dma_addr =
>   phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
> @@ -498,7 +498,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
> phys_addr_t orig_addr,
>   size_t mapping_size, size_t alloc_size,
>   enum dma_data_direction dir, unsigned long attrs)
>  {
> - struct io_tlb_mem *mem = io_tlb_default_mem;
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>   unsigned int offset = swiotlb_align_offset(dev, orig_addr);
>   unsigned int i;
>   int index;
> @@ -549,7 +549,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, 
> phys_addr_t tlb_addr,
> size_t mapping_size, enum dma_data_direction dir,
> unsigned long attrs)
>  {
> - struct io_tlb_mem *mem = io_tlb_default_mem;
> + struct io_tlb_mem *mem = hwdev->dma_io_tlb_mem;
>   unsigned long flags;
>   unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
>   int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> -- 
> 2.32.0.272.g935e593368-goog
> 


Re: simplify gendisk and request_queue allocation for blk-mq based drivers

2021-06-04 Thread Konrad Rzeszutek Wilk
On Wed, Jun 02, 2021 at 09:53:15AM +0300, Christoph Hellwig wrote:
> Hi all,

Hi!

You wouldn't have a nice git repo to pull so one can test it easily?

Thank you!

Cc-ing Boris/Juergen - pls see below xen.
> 
> this series is the scond part of cleaning up lifetimes and allocation of
> the gendisk and request_queue structure.  It adds a new interface to
> allocate the disk and queue together for blk based drivers, and uses that
> in all drivers that do not have any caveats in their gendisk and
> request_queue lifetime rules.
> 
> Diffstat:
>  block/blk-mq.c  |   91 +++---
>  block/blk.h |1 
>  block/elevator.c|2 
>  drivers/block/amiflop.c |   16 +-
>  drivers/block/aoe/aoeblk.c  |   33 
>  drivers/block/aoe/aoedev.c  |3 -
>  drivers/block/ataflop.c |   16 +-
>  drivers/block/floppy.c  |   20 +--
>  drivers/block/loop.c|   19 ++-
>  drivers/block/nbd.c |   53 +++
>  drivers/block/null_blk/main.c   |   11 +---
>  drivers/block/paride/pcd.c  |   19 +++
>  drivers/block/paride/pd.c   |   30 ---
>  drivers/block/paride/pf.c   |   18 ++
>  drivers/block/ps3disk.c |   36 +
>  drivers/block/rbd.c |   52 ++-
>  drivers/block/rnbd/rnbd-clt.c   |   35 +++--
>  drivers/block/sunvdc.c  |   47 -
>  drivers/block/swim.c|   34 +---
>  drivers/block/swim3.c   |   33 +---
>  drivers/block/sx8.c |   23 ++--
>  drivers/block/virtio_blk.c  |   26 ++---
>  drivers/block/xen-blkfront.c|   96 
> ++--
>  drivers/block/z2ram.c   |   15 +
>  drivers/cdrom/gdrom.c   |   45 +++-
>  drivers/md/dm-rq.c  |9 +--
>  drivers/memstick/core/ms_block.c|   25 +++--
>  drivers/memstick/core/mspro_block.c |   26 -
>  drivers/mtd/mtd_blkdevs.c   |   48 --
>  drivers/mtd/ubi/block.c |   68 ++---
>  drivers/s390/block/scm_blk.c|   21 ++-
>  include/linux/blk-mq.h  |   24 ++---
>  include/linux/elevator.h|1 
>  33 files changed, 386 insertions(+), 610 deletions(-)


Re: [PATCH v7 01/15] swiotlb: Refactor swiotlb init functions

2021-05-24 Thread Konrad Rzeszutek Wilk
> > do the set_memory_decrypted()+memset(). Is this okay or should
> > swiotlb_init_io_tlb_mem() add an additional argument to do this
> > conditionally?
> 
> I'm actually not sure if this it okay. If not, will add an additional
> argument for it.

Any observations discovered? (Want to make sure my memory-cache has the
correct semantics for set_memory_decrypted in mind).
> 
> > --
> > Florian


Re: [PATCH v7 05/15] swiotlb: Add a new get_io_tlb_mem getter

2021-05-24 Thread Konrad Rzeszutek Wilk
On Tue, May 18, 2021 at 02:51:52PM +0800, Claire Chang wrote:
> Still keep this function because directly using dev->dma_io_tlb_mem
> will cause issues for memory allocation for existing devices. The pool
> can't support atomic coherent allocation so we need to distinguish the
> per device pool and the default pool in swiotlb_alloc.

This above should really be rolled in the commit. You can prefix it by
"The reason it was done this way was because directly using .."




Re: [PATCH v7 04/15] swiotlb: Add restricted DMA pool initialization

2021-05-24 Thread Konrad Rzeszutek Wilk
On Tue, May 18, 2021 at 02:48:35PM +0800, Claire Chang wrote:
> I didn't move this to a separate file because I feel it might be
> confusing for swiotlb_alloc/free (and need more functions to be
> non-static).
> Maybe instead of moving to a separate file, we can try to come up with
> a better naming?

I think you are referring to:

rmem_swiotlb_setup

?

Which is ARM specific and inside the generic code?



Christopher wants to unify it in all the code so there is one single
source, but the "you seperate arch code out from generic" saying
makes me want to move it out.

I agree that if you move it out from generic to arch-specific we have to
expose more of the swiotlb functions, which will undo's Christopher
cleanup code.

How about this - lets leave it as is now, and when there are more
use-cases we can revisit it and then if need to move the code?



Re: [PATCH 12/14] swiotlb: move global variables into a new io_tlb_mem structure

2021-03-17 Thread Konrad Rzeszutek Wilk
On Wed, Mar 17, 2021 at 06:57:42PM +0100, Christoph Hellwig wrote:
> On Wed, Mar 17, 2021 at 01:51:56PM -0400, Konrad Rzeszutek Wilk wrote:
> > On Wed, Mar 17, 2021 at 02:53:27PM +0100, Christoph Hellwig wrote:
> > > On Wed, Mar 17, 2021 at 01:42:07PM +, Konrad Rzeszutek Wilk wrote:
> > > > > - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
> > > > > - io_tlb_alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
> > > > > - if (!io_tlb_alloc_size)
> > > > > - panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> > > > > -   __func__, alloc_size, PAGE_SIZE);
> > > > 
> > > > Shouldn't this be converted to:
> > > > mem->alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
> > > > if (...)
> > > > 
> > > > Seems that it got lost in the search and replace?
> > > 
> > > Yes, I messed that up during the rebase.  That being said it magically
> > > gets fixed in the next patch..
> > 
> > Yes. However if someone does a bisection they are going to be mighty unhappy
> > with you.
> 
> Sure, I was planning on fixing it anyway.  Just waiting for feedback
> on the rest of the patches before doing a respin.

I put the patches up-to this one on :

 git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb 
devel/for-linus-5.13

Would you be OK rebasing on top of that and sending the patches?

Juergen, would you be OK testing that branch on your Xen setup?


Re: [PATCH 12/14] swiotlb: move global variables into a new io_tlb_mem structure

2021-03-17 Thread Konrad Rzeszutek Wilk
On Wed, Mar 17, 2021 at 02:53:27PM +0100, Christoph Hellwig wrote:
> On Wed, Mar 17, 2021 at 01:42:07PM +0000, Konrad Rzeszutek Wilk wrote:
> > > - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
> > > - io_tlb_alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
> > > - if (!io_tlb_alloc_size)
> > > - panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> > > -   __func__, alloc_size, PAGE_SIZE);
> > 
> > Shouldn't this be converted to:
> > mem->alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
> > if (...)
> > 
> > Seems that it got lost in the search and replace?
> 
> Yes, I messed that up during the rebase.  That being said it magically
> gets fixed in the next patch..

Yes. However if someone does a bisection they are going to be mighty unhappy
with you.


Re: [PATCH 12/14] swiotlb: move global variables into a new io_tlb_mem structure

2021-03-17 Thread Konrad Rzeszutek Wilk
..snip..
>  int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int 
> verbose)
>  {
..snip..
>   /*
>* Allocate and initialize the free list array.  This array is used
>* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
> -  * between io_tlb_start and io_tlb_end.
> +  * between mem->start and mem->end.
>*/
> - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
> - io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
> - if (!io_tlb_list)
> + alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(int));
> + mem->list = memblock_alloc(alloc_size, PAGE_SIZE);
> + if (!mem->list)
>   panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> __func__, alloc_size, PAGE_SIZE);
>  
> - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
> - io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
> - if (!io_tlb_orig_addr)
> + alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(phys_addr_t));
> + mem->orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
> + if (!mem->orig_addr)
>   panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> __func__, alloc_size, PAGE_SIZE);
>  
> - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
> - io_tlb_alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
> - if (!io_tlb_alloc_size)
> - panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> -   __func__, alloc_size, PAGE_SIZE);

Shouldn't this be converted to:
mem->alloc_size = memblock_alloc(alloc_size, PAGE_SIZE);
if (...)

Seems that it got lost in the search and replace?
> -
> - for (i = 0; i < io_tlb_nslabs; i++) {
> - io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
> - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
> - io_tlb_alloc_size[i] = 0;
> + for (i = 0; i < mem->nslabs; i++) {
> + mem->list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
> + mem->orig_addr[i] = INVALID_PHYS_ADDR;
> + mem->alloc_size[i] = 0;
>   }
> - io_tlb_index = 0;
>   no_iotlb_memory = false;
>  
>   if (verbose)
>   swiotlb_print_info();
>  
> - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
> + swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
>   return 0;
>  }
>  

..snip..


Re: [PATCH 03/14] swiotlb: move orig addr and size validation into swiotlb_bounce

2021-03-16 Thread Konrad Rzeszutek Wilk
On Mon, Mar 01, 2021 at 08:44:25AM +0100, Christoph Hellwig wrote:
> Move the code to find and validate the original buffer address and size
> from the callers into swiotlb_bounce.  This means a tiny bit of extra
> work in the swiotlb_map path, but avoids code duplication and a leads to
> a better code structure.


Reviewed-by: Konrad Rzeszutek Wilk 


Re: [PATCH 02/14] swiotlb: remove the alloc_size parameter to swiotlb_tbl_unmap_single

2021-03-16 Thread Konrad Rzeszutek Wilk
On Mon, Mar 01, 2021 at 08:44:24AM +0100, Christoph Hellwig wrote:
> Now that swiotlb remembers the allocation size there is no need to pass
> it back to swiotlb_tbl_unmap_single.
Reviewed-by: Konrad Rzeszutek Wilk 



Re: [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays

2021-02-19 Thread Konrad Rzeszutek Wilk
On Sun, Feb 07, 2021 at 04:56:01PM +0100, Christoph Hellwig wrote:
> On Thu, Feb 04, 2021 at 09:40:23AM +0100, Christoph Hellwig wrote:
> > So one thing that has been on my mind for a while:  I'd really like
> > to kill the separate dma ops in Xen swiotlb.  If we compare xen-swiotlb
> > to swiotlb the main difference seems to be:
> > 
> >  - additional reasons to bounce I/O vs the plain DMA capable
> >  - the possibility to do a hypercall on arm/arm64
> >  - an extra translation layer before doing the phys_to_dma and vice
> >versa
> >  - an special memory allocator
> > 
> > I wonder if inbetween a few jump labels or other no overhead enablement
> > options and possibly better use of the dma_range_map we could kill
> > off most of swiotlb-xen instead of maintaining all this code duplication?
> 
> So I looked at this a bit more.
> 
> For x86 with XENFEAT_auto_translated_physmap (how common is that?)

Juergen, Boris please correct me if I am wrong, but that 
XENFEAT_auto_translated_physmap
only works for PVH guests?

> pfn_to_gfn is a nop, so plain phys_to_dma/dma_to_phys do work as-is.
> 
> xen_arch_need_swiotlb always returns true for x86, and
> range_straddles_page_boundary should never be true for the
> XENFEAT_auto_translated_physmap case.

Correct. The kernel should have no clue of what the real MFNs are
for PFNs.
> 
> So as far as I can tell the mapping fast path for the
> XENFEAT_auto_translated_physmap can be trivially reused from swiotlb.
> 
> That leaves us with the next more complicated case, x86 or fully cache
> coherent arm{,64} without XENFEAT_auto_translated_physmap.  In that case
> we need to patch in a phys_to_dma/dma_to_phys that performs the MFN
> lookup, which could be done using alternatives or jump labels.
> I think if that is done right we should also be able to let that cover
> the foreign pages in is_xen_swiotlb_buffer/is_swiotlb_buffer, but
> in that worst case that would need another alternative / jump label.
> 
> For non-coherent arm{,64} we'd also need to use alternatives or jump
> labels to for the cache maintainance ops, but that isn't a hard problem
> either.
> 
> 


Re: [PATCH 3/8] xen-swiotlb: use io_tlb_end in xen_swiotlb_dma_supported

2021-02-19 Thread Konrad Rzeszutek Wilk
On Sun, Feb 07, 2021 at 05:09:29PM +0100, Christoph Hellwig wrote:
> Use the existing variable that holds the physical address for
> xen_io_tlb_end to simplify xen_swiotlb_dma_supported a bit, and remove
> the otherwise unused xen_io_tlb_end variable and the xen_virt_to_bus
> helper.
> 
Reviewed-by: Konrad Rzeszutek Wilk 

> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/xen/swiotlb-xen.c | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index a4026822a889f7..4298f74a083985 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -46,7 +46,7 @@
>   * API.
>   */
>  
> -static char *xen_io_tlb_start, *xen_io_tlb_end;
> +static char *xen_io_tlb_start;
>  static unsigned long xen_io_tlb_nslabs;
>  /*
>   * Quick lookup value of the bus address of the IOTLB.
> @@ -82,11 +82,6 @@ static inline phys_addr_t xen_dma_to_phys(struct device 
> *dev,
>   return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
>  }
>  
> -static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address)
> -{
> - return xen_phys_to_dma(dev, virt_to_phys(address));
> -}
> -
>  static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
>  {
>   unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
> @@ -250,7 +245,6 @@ int __ref xen_swiotlb_init(int verbose, bool early)
>   rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, 
> xen_io_tlb_nslabs);
>  
>  end:
> - xen_io_tlb_end = xen_io_tlb_start + bytes;
>   if (!rc)
>   swiotlb_set_max_segment(PAGE_SIZE);
>  
> @@ -558,7 +552,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct 
> scatterlist *sgl,
>  static int
>  xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
>  {
> - return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
> + return xen_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
>  }
>  
>  const struct dma_map_ops xen_swiotlb_dma_ops = {
> -- 
> 2.29.2
> 


Re: [PATCH 2/8] xen-swiotlb: use is_swiotlb_buffer in is_xen_swiotlb_buffer

2021-02-19 Thread Konrad Rzeszutek Wilk
On Sun, Feb 07, 2021 at 05:09:28PM +0100, Christoph Hellwig wrote:
> Use the is_swiotlb_buffer to check if a physical address is
> a swiotlb buffer.  This works because xen-swiotlb does use the
> same buffer as the main swiotlb code, and xen_io_tlb_{start,end}
> are just the addresses for it that went through phys_to_virt.
> 

Reviewed-by: Konrad Rzeszutek Wilk 

> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/xen/swiotlb-xen.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 2b385c1b4a99cb..a4026822a889f7 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -111,10 +111,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, 
> dma_addr_t dma_addr)
>* have the same virtual address as another address
>* in our domain. Therefore _only_ check address within our domain.
>*/
> - if (pfn_valid(PFN_DOWN(paddr))) {
> - return paddr >= virt_to_phys(xen_io_tlb_start) &&
> -paddr < virt_to_phys(xen_io_tlb_end);
> - }
> + if (pfn_valid(PFN_DOWN(paddr)))
> + return is_swiotlb_buffer(paddr);
>   return 0;
>  }
>  
> -- 
> 2.29.2
> 


Re: [PATCH RFC v1 2/6] swiotlb: convert variables to arrays

2021-02-04 Thread Konrad Rzeszutek Wilk
On Thu, Feb 04, 2021 at 11:49:23AM +, Robin Murphy wrote:
> On 2021-02-04 07:29, Christoph Hellwig wrote:
> > On Wed, Feb 03, 2021 at 03:37:05PM -0800, Dongli Zhang wrote:
> > > This patch converts several swiotlb related variables to arrays, in
> > > order to maintain stat/status for different swiotlb buffers. Here are
> > > variables involved:
> > > 
> > > - io_tlb_start and io_tlb_end
> > > - io_tlb_nslabs and io_tlb_used
> > > - io_tlb_list
> > > - io_tlb_index
> > > - max_segment
> > > - io_tlb_orig_addr
> > > - no_iotlb_memory
> > > 
> > > There is no functional change and this is to prepare to enable 64-bit
> > > swiotlb.
> > 
> > Claire Chang (on Cc) already posted a patch like this a month ago,
> > which looks much better because it actually uses a struct instead
> > of all the random variables.
> 
> Indeed, I skimmed the cover letter and immediately thought that this whole
> thing is just the restricted DMA pool concept[1] again, only from a slightly
> different angle.


Kind of. Let me lay out how some of these pieces are right now:

+---+  +--+
|   |  |  |
|   |  |  |
|   a)Xen-SWIOTLB   |  | b)SWIOTLB (for !Xen) |
|   |  |  |
+---XX--+  +---X--+
   X
  XX XXX
X   XX

 +--XX---+
 |   |
 |   |
 |   c) SWIOTLB generic  |
 |   |
 +---+

Dongli's patches modify the SWIOTLB generic c), and Xen-SWIOTLB a)
parts.

Also see the IOMMU_INIT logic which lays this a bit more deepth
(for example how to enable SWIOTLB on AMD boxes, or IBM with Calgary
IOMMU, etc - see iommu_table.h).

Furtheremore it lays the groundwork to allocate AMD SEV SWIOTLB buffers
later after boot (so that you can stich different pools together).
All the bits are kind of inside of the SWIOTLB code. And also it changes
the Xen-SWIOTLB to do something similar.

The mempool did it similarly by taking the internal parts (aka the
various io_tlb) of SWIOTLB and exposing them out and having
other code:

+---+  +--+
|   |  |  |
|   |  |  |
| a)Xen-SWIOTLB |  | b)SWIOTLB (for !Xen) |
|   |  |  |
+---XX--+  +---X--+
   X
  XX XXX
X   XX

 +--XX---+ +--+
 |   | | Device tree  |
 |   +<+ enabling SWIOTLB |
 |c) SWIOTLB generic | |  |
 |   | | mempool  |
 +---+ +--+

What I was suggesting to Clarie to follow Xen model, that is
do something like this:

+---+  +--+   ++
|   |  |  |   ||
|   |  |  |   ||
| a)Xen-SWIOTLB |  | b)SWIOTLB (for !Xen) |   | e) DT-SWIOTLB  |
|   |  |  |   ||
+---XX--+  +---X--+   +XX-X+
   XXXX X X XX X XX
  XX XXX
X   XX X

 +--XXX--+
 |   |
 |   |
 |c) SWIOTLB generic |
 |   |
 +---+


so using the SWIOTLB generic parts, and then bolt on top
of the device-tree logic, along with the mempool logic.



But Christopher has an interesting suggestion which is
to squash the all the existing code (a, b, c) all together
and pepper it with various jump-tables.


So:


-+
| SWIOTLB:   |
||
|  a) SWIOTLB (for non-Xen)  |
|  b) Xen-SWIOTLB|
|  c) DT-SWIOTLB |
||
||
-+


with all the various bits (M2P/P2M for Xen, mempool for ARM,
and normal allocation for BM) in one big file.



Re: [PATCH] powerpc/mm: Limit allocation of SWIOTLB on server machines

2021-01-21 Thread Konrad Rzeszutek Wilk
On Fri, Jan 08, 2021 at 09:27:01PM -0300, Thiago Jung Bauermann wrote:
> 
> Ram Pai  writes:
> 
> > On Wed, Dec 23, 2020 at 09:06:01PM -0300, Thiago Jung Bauermann wrote:
> >> 
> >> Hi Ram,
> >> 
> >> Thanks for reviewing this patch.
> >> 
> >> Ram Pai  writes:
> >> 
> >> > On Fri, Dec 18, 2020 at 03:21:03AM -0300, Thiago Jung Bauermann wrote:
> >> >> On server-class POWER machines, we don't need the SWIOTLB unless we're a
> >> >> secure VM. Nevertheless, if CONFIG_SWIOTLB is enabled we unconditionally
> >> >> allocate it.
> >> >> 
> >> >> In most cases this is harmless, but on a few machine configurations 
> >> >> (e.g.,
> >> >> POWER9 powernv systems with 4 GB area reserved for crashdump kernel) it 
> >> >> can
> >> >> happen that memblock can't find a 64 MB chunk of memory for the SWIOTLB 
> >> >> and
> >> >> fails with a scary-looking WARN_ONCE:
> >> >> 
> >> >>  [ cut here ]
> >> >>  memblock: bottom-up allocation failed, memory hotremove may be affected
> >> >>  WARNING: CPU: 0 PID: 0 at mm/memblock.c:332 
> >> >> memblock_find_in_range_node+0x328/0x340
> >> >>  Modules linked in:
> >> >>  CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc2-orig+ #6
> >> >>  NIP:  c0442f38 LR: c0442f34 CTR: c01e0080
> >> >>  REGS: c1def900 TRAP: 0700   Not tainted  (5.10.0-rc2-orig+)
> >> >>  MSR:  92021033   CR: 2802  XER: 
> >> >> 2004
> >> >>  CFAR: c014b7b4 IRQMASK: 1
> >> >>  GPR00: c0442f34 c1defba0 c1deff00 
> >> >> 0047
> >> >>  GPR04: 7fff c1def828 c1def820 
> >> >> 
> >> >>  GPR08: 001ffc3e c1b75478 c1b75478 
> >> >> 0001
> >> >>  GPR12: 2000 c203  
> >> >> 
> >> >>  GPR16:    
> >> >> 0203
> >> >>  GPR20:  0001 0001 
> >> >> c1defc10
> >> >>  GPR24: c1defc08 c1c91868 c1defc18 
> >> >> c1c91890
> >> >>  GPR28:   0400 
> >> >> 
> >> >>  NIP [c0442f38] memblock_find_in_range_node+0x328/0x340
> >> >>  LR [c0442f34] memblock_find_in_range_node+0x324/0x340
> >> >>  Call Trace:
> >> >>  [c1defba0] [c0442f34] 
> >> >> memblock_find_in_range_node+0x324/0x340 (unreliable)
> >> >>  [c1defc90] [c15ac088] 
> >> >> memblock_alloc_range_nid+0xec/0x1b0
> >> >>  [c1defd40] [c15ac1f8] 
> >> >> memblock_alloc_internal+0xac/0x110
> >> >>  [c1defda0] [c15ac4d0] memblock_alloc_try_nid+0x94/0xcc
> >> >>  [c1defe30] [c159c3c8] swiotlb_init+0x78/0x104
> >> >>  [c1defea0] [c158378c] mem_init+0x4c/0x98
> >> >>  [c1defec0] [c157457c] start_kernel+0x714/0xac8
> >> >>  [c1deff90] [c000d244] start_here_common+0x1c/0x58
> >> >>  Instruction dump:
> >> >>  2c23 4182ffd4 ea610088 ea810090 4bfffe84 3921 3d42fff4 3c62ff60
> >> >>  3863c560 992a8bfc 4bd0881d 6000 <0fe0> ea610088 4bfffd94 
> >> >> 6000
> >> >>  random: get_random_bytes called from __warn+0x128/0x184 with 
> >> >> crng_init=0
> >> >>  ---[ end trace  ]---
> >> >>  software IO TLB: Cannot allocate buffer
> >> >> 
> >> >> Unless this is a secure VM the message can actually be ignored, because 
> >> >> the
> >> >> SWIOTLB isn't needed. Therefore, let's avoid the SWIOTLB in those cases.
> >> >
> >> > The above warn_on is conveying a genuine warning. Should it be silenced?
> >> 
> >> Not sure I understand your point. This patch doesn't silence the
> >> warning, it avoids the problem it is warning about.
> >
> > Sorry, I should have explained it better. My point is...  
> >
> > If CONFIG_SWIOTLB is enabled, it means that the kernel is
> > promising the bounce buffering capability. I know, currently we
> > do not have any kernel subsystems that use bounce buffers on
> > non-secure-pseries-kernel or powernv-kernel.  But that does not
> > mean, there wont be any. In case there is such a third-party
> > module needing bounce buffering, it wont be able to operate,
> > because of the proposed change in your patch.
> >
> > Is that a good thing or a bad thing, I do not know. I will let
> > the experts opine.
> 
> Ping? Does anyone else has an opinion on this? The other option I can
> think of is changing the crashkernel code to not reserve so much memory
> below 4 GB. Other people are considering this option, but it's not
> planned for the near future.

That seems a more suitable solution regardless, but there is always
the danger of not being enough or being too big.

There was some autocrashkernel allocation patches going around
for x86 and ARM that perhaps could be re-used?

> 
> Also, there's a patch currently in linux-next which 

Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool

2021-01-07 Thread Konrad Rzeszutek Wilk
On Fri, Jan 08, 2021 at 01:39:43AM +0800, Claire Chang wrote:
> On Thu, Jan 7, 2021 at 2:58 AM Konrad Rzeszutek Wilk
>  wrote:
> >
> > On Wed, Jan 06, 2021 at 11:41:23AM +0800, Claire Chang wrote:
> > > Introduce the new compatible string, restricted-dma-pool, for restricted
> > > DMA. One can specify the address and length of the restricted DMA memory
> > > region by restricted-dma-pool in the device tree.
> > >
> > > Signed-off-by: Claire Chang 
> > > ---
> > >  .../reserved-memory/reserved-memory.txt   | 24 +++
> > >  1 file changed, 24 insertions(+)
> > >
> > > diff --git 
> > > a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
> > > b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > index e8d3096d922c..44975e2a1fd2 100644
> > > --- 
> > > a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > +++ 
> > > b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > @@ -51,6 +51,20 @@ compatible (optional) - standard definition
> > >used as a shared pool of DMA buffers for a set of devices. It 
> > > can
> > >be used by an operating system to instantiate the necessary 
> > > pool
> > >management subsystem if necessary.
> > > +- restricted-dma-pool: This indicates a region of memory meant 
> > > to be
> > > +  used as a pool of restricted DMA buffers for a set of devices. 
> > > The
> > > +  memory region would be the only region accessible to those 
> > > devices.
> > > +  When using this, the no-map and reusable properties must not 
> > > be set,
> > > +  so the operating system can create a virtual mapping that will 
> > > be used
> > > +  for synchronization. The main purpose for restricted DMA is to
> > > +  mitigate the lack of DMA access control on systems without an 
> > > IOMMU,
> > > +  which could result in the DMA accessing the system memory at
> > > +  unexpected times and/or unexpected addresses, possibly leading 
> > > to data
> > > +  leakage or corruption. The feature on its own provides a basic 
> > > level
> > > +  of protection against the DMA overwriting buffer contents at
> > > +  unexpected times. However, to protect against general data 
> > > leakage and
> > > +  system memory corruption, the system needs to provide way to 
> > > restrict
> > > +  the DMA to a predefined memory region.
> >
> > Heya!
> >
> > I think I am missing something obvious here so please bear with my
> > questions:
> >
> >  - This code adds the means of having the SWIOTLB pool tied to a specific
> >memory correct?
> 
> It doesn't affect the existing SWIOTLB. It just utilizes the existing SWIOTLB
> code to create another DMA pool tied to a specific memory region for a given 
> set
> of devices. It bounces the streaming DMA (map/unmap) in and out of that region
> and does the memory allocation (dma_direct_alloc) from the same region.

Right, so why can't it follow the same mechanism that Xen SWIOTLB does - which
had exactly the same problem (needed special handling on the pool) - and do
a similar code?

> 
> >
> >
> >  - Nothing stops the physical device from bypassing the SWIOTLB buffer.
> >That is if an errant device screwed up the length or DMA address, the
> >SWIOTLB would gladly do what the device told it do?
> 
> So the system needs to provide a way to lock down the memory access, e.g. MPU.

OK! Would it be prudent to have this in the description above perhaps?
> 
> >
> >  - This has to be combined with SWIOTLB-force-ish to always use the
> >bounce buffer, otherwise you could still do DMA without using
> >SWIOTLB (by not hitting the criteria for needing to use SWIOTLB)?
> 
> Since restricted DMA is for the devices that are not behind an IOMMU, I change
> the criteria
> `if (unlikely(swiotlb_force == SWIOTLB_FORCE))`
> to
> `if (unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dma_io_tlb_mem)`
> in dma_direct_map_page().
> 
> Also, even if SWIOTLB=force, the restricted DMA pool is preferred if available
> (get_io_tlb_mem in https://lore.kernel.org/patchwork/patch/1360995/).
> 
> Thanks!


Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool

2021-01-07 Thread Konrad Rzeszutek Wilk
On Thu, Jan 07, 2021 at 10:09:14AM -0800, Florian Fainelli wrote:
> On 1/7/21 9:57 AM, Konrad Rzeszutek Wilk wrote:
> > On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
> >> Hi Greg and Konrad,
> >>
> >> This change is intended to be non-arch specific. Any arch that lacks DMA 
> >> access
> >> control and has devices not behind an IOMMU can make use of it. Could you 
> >> share
> >> why you think this should be arch specific?
> > 
> > The idea behind non-arch specific code is it to be generic. The devicetree
> > is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
> > be in arch specific code.
> 
> In premise the same code could be used with an ACPI enabled system with
> an appropriate service to identify the restricted DMA regions and unlock
> them.

Which this patchset is not.

> 
> More than 1 architecture requiring this function (ARM and ARM64 are the
> two I can think of needing this immediately) sort of calls for making
> the code architecture agnostic since past 2, you need something that scales.

I believe the use-case is for ARM64 at this moment.

> 
> There is already code today under kernel/dma/contiguous.c that is only
> activated on a CONFIG_OF=y && CONFIG_OF_RESERVED_MEM=y system, this is
> no different.
> -- 
> Florian


Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool

2021-01-07 Thread Konrad Rzeszutek Wilk
On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
> Hi Greg and Konrad,
> 
> This change is intended to be non-arch specific. Any arch that lacks DMA 
> access
> control and has devices not behind an IOMMU can make use of it. Could you 
> share
> why you think this should be arch specific?

The idea behind non-arch specific code is it to be generic. The devicetree
is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
be in arch specific code.

> 
> Thanks!


Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool

2021-01-06 Thread Konrad Rzeszutek Wilk
On Wed, Jan 06, 2021 at 11:41:23AM +0800, Claire Chang wrote:
> Introduce the new compatible string, restricted-dma-pool, for restricted
> DMA. One can specify the address and length of the restricted DMA memory
> region by restricted-dma-pool in the device tree.
> 
> Signed-off-by: Claire Chang 
> ---
>  .../reserved-memory/reserved-memory.txt   | 24 +++
>  1 file changed, 24 insertions(+)
> 
> diff --git 
> a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
> b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> index e8d3096d922c..44975e2a1fd2 100644
> --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> @@ -51,6 +51,20 @@ compatible (optional) - standard definition
>used as a shared pool of DMA buffers for a set of devices. It can
>be used by an operating system to instantiate the necessary pool
>management subsystem if necessary.
> +- restricted-dma-pool: This indicates a region of memory meant to be
> +  used as a pool of restricted DMA buffers for a set of devices. The
> +  memory region would be the only region accessible to those devices.
> +  When using this, the no-map and reusable properties must not be 
> set,
> +  so the operating system can create a virtual mapping that will be 
> used
> +  for synchronization. The main purpose for restricted DMA is to
> +  mitigate the lack of DMA access control on systems without an 
> IOMMU,
> +  which could result in the DMA accessing the system memory at
> +  unexpected times and/or unexpected addresses, possibly leading to 
> data
> +  leakage or corruption. The feature on its own provides a basic 
> level
> +  of protection against the DMA overwriting buffer contents at
> +  unexpected times. However, to protect against general data leakage 
> and
> +  system memory corruption, the system needs to provide way to 
> restrict
> +  the DMA to a predefined memory region.

Heya!

I think I am missing something obvious here so please bear with my
questions:

 - This code adds the means of having the SWIOTLB pool tied to a specific
   memory correct?

 - Nothing stops the physical device from bypassing the SWIOTLB buffer.
   That is if an errant device screwed up the length or DMA address, the
   SWIOTLB would gladly do what the device told it do?

 - This has to be combined with SWIOTLB-force-ish to always use the
   bounce buffer, otherwise you could still do DMA without using
   SWIOTLB (by not hitting the criteria for needing to use SWIOTLB)?


Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool

2021-01-06 Thread Konrad Rzeszutek Wilk
Hello!

In this file:

> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index e4368159f88a..7fb2ac087d23 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
..

> +static const struct reserved_mem_ops rmem_swiotlb_ops = {
> + .device_init= rmem_swiotlb_device_init,
> + .device_release = rmem_swiotlb_device_release,
> +};
> +
> +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
> +{
> + unsigned long node = rmem->fdt_node;
> +
> + if (of_get_flat_dt_prop(node, "reusable", NULL) ||
> + of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
> + of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
> + of_get_flat_dt_prop(node, "no-map", NULL))
> + return -EINVAL;
> +
> + rmem->ops = _swiotlb_ops;
> + pr_info("Reserved memory: created device swiotlb memory pool at %pa, 
> size %ld MiB\n",
> + >base, (unsigned long)rmem->size / SZ_1M);
> + return 0;
> +}
> +
> +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);

The code should be as much as possible arch-agnostic. That is why there
are multiple -swiotlb files scattered in arch directories that own the
architecture specific code.

Would it be possible to move the code there and perhaps have a ARM
specific front-end for this DMA restricted pool there? See for example
the xen-swiotlb code.

Cheers!

Konrad


Re: [PATCH v3] powerpc/pseries/svm: Allocate SWIOTLB buffer anywhere in memory

2020-08-19 Thread Konrad Rzeszutek Wilk
On Tue, Aug 18, 2020 at 07:11:26PM -0300, Thiago Jung Bauermann wrote:
> POWER secure guests (i.e., guests which use the Protection Execution
> Facility) need to use SWIOTLB to be able to do I/O with the hypervisor, but
> they don't need the SWIOTLB memory to be in low addresses since the
> hypervisor doesn't have any addressing limitation.
> 
> This solves a SWIOTLB initialization problem we are seeing in secure guests
> with 128 GB of RAM: they are configured with 4 GB of crashkernel reserved
> memory, which leaves no space for SWIOTLB in low addresses.
> 
> To do this, we use mostly the same code as swiotlb_init(), but allocate the
> buffer using memblock_alloc() instead of memblock_alloc_low().
> 
> Signed-off-by: Thiago Jung Bauermann 

Reviewed-by: Konrad Rzeszutek Wilk 


> ---
>  arch/powerpc/include/asm/svm.h   |  4 
>  arch/powerpc/mm/mem.c|  6 +-
>  arch/powerpc/platforms/pseries/svm.c | 26 ++
>  3 files changed, 35 insertions(+), 1 deletion(-)
> 
> Changes from v2:
> - Panic if unable to allocate buffer, as suggested by Christoph.
> 
> Changes from v1:
> - Open-code swiotlb_init() in arch-specific code, as suggested by
>   Christoph.
> 
> diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
> index 85580b30aba4..7546402d796a 100644
> --- a/arch/powerpc/include/asm/svm.h
> +++ b/arch/powerpc/include/asm/svm.h
> @@ -15,6 +15,8 @@ static inline bool is_secure_guest(void)
>   return mfmsr() & MSR_S;
>  }
>  
> +void __init svm_swiotlb_init(void);
> +
>  void dtl_cache_ctor(void *addr);
>  #define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
>  
> @@ -25,6 +27,8 @@ static inline bool is_secure_guest(void)
>   return false;
>  }
>  
> +static inline void svm_swiotlb_init(void) {}
> +
>  #define get_dtl_cache_ctor() NULL
>  
>  #endif /* CONFIG_PPC_SVM */
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index c2c11eb8dcfc..0f21bcb16405 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -50,6 +50,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -290,7 +291,10 @@ void __init mem_init(void)
>* back to to-down.
>*/
>   memblock_set_bottom_up(true);
> - swiotlb_init(0);
> + if (is_secure_guest())
> + svm_swiotlb_init();
> + else
> + swiotlb_init(0);
>  #endif
>  
>   high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
> diff --git a/arch/powerpc/platforms/pseries/svm.c 
> b/arch/powerpc/platforms/pseries/svm.c
> index 40c0637203d5..81085eb8f225 100644
> --- a/arch/powerpc/platforms/pseries/svm.c
> +++ b/arch/powerpc/platforms/pseries/svm.c
> @@ -7,6 +7,7 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -34,6 +35,31 @@ static int __init init_svm(void)
>  }
>  machine_early_initcall(pseries, init_svm);
>  
> +/*
> + * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
> + * can allocate the buffer anywhere in memory. Since the hypervisor doesn't 
> have
> + * any addressing limitation, we don't need to allocate it in low addresses.
> + */
> +void __init svm_swiotlb_init(void)
> +{
> + unsigned char *vstart;
> + unsigned long bytes, io_tlb_nslabs;
> +
> + io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
> + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
> +
> + bytes = io_tlb_nslabs << IO_TLB_SHIFT;
> +
> + vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
> + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
> + return;
> +
> + if (io_tlb_start)
> + memblock_free_early(io_tlb_start,
> + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
> + panic("SVM: Cannot allocate SWIOTLB buffer");
> +}
> +
>  int set_memory_encrypted(unsigned long addr, int numpages)
>  {
>   if (!PAGE_ALIGNED(addr))


Re: [RFC PATCH 11/11] powerpc/svm: Increase SWIOTLB buffer size

2018-08-27 Thread Konrad Rzeszutek Wilk
On Fri, Aug 24, 2018 at 01:25:35PM -0300, Thiago Jung Bauermann wrote:
> From: Anshuman Khandual 
> 
> SWIOTLB buffer default size (64MB) is not enough for large sequential write
> operations which eventually leads to kernel crash like here.
> 
> virtio-pci :00:05.0: swiotlb buffer is full (sz: 327680 bytes)
> virtio-pci :00:05.0: DMA: Out of SW-IOMMU space for 327680 bytes
> Kernel panic - not syncing: DMA: Random memory could be DMA read
> CPU: 12 PID: 3985 Comm: mkfs.ext4 Not tainted 4.18.0-rc4+ #285
> Call Trace:
> [c007d2a27020] [c0cfdffc] dump_stack+0xb0/0xf4 (unreliable)
> [c007d2a27060] [c0112a98] panic+0x140/0x328
> [c007d2a270f0] [c01b4f88] swiotlb_full+0x108/0x130
> [c007d2a27180] [c01b5f6c] swiotlb_map_page+0x25c/0x2c0
> [c007d2a271e0] [c07bfaf8] vring_map_one_sg.isra.0+0x58/0x70
> [c007d2a27200] [c07c08dc] virtqueue_add_sgs+0x1bc/0x690
> [c007d2a272f0] [d42a1280] virtio_queue_rq+0x358/0x4a0 [virtio_blk]
> [c007d2a273d0] [c06b5d68] blk_mq_dispatch_rq_list+0x1f8/0x6d0
> ..
> 
> Increase the SWIOTLB size to 1GB on Ultravisor based secure guests.

Gosh, that is huge.

What about making the SWIOTLB be more dynamic? That is expand it's size
dynamically if it can?

> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Thiago Jung Bauermann 
> ---
>  arch/powerpc/Kconfig | 5 +
>  kernel/dma/swiotlb.c | 5 +
>  2 files changed, 10 insertions(+)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 1466d1234723..fee7194ce9e4 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -457,6 +457,11 @@ config PPC_SVM
>  
>If unsure, say "N".
>  
> +config SWIOTLB_DEFAULT_SIZE
> +   int "Size of Software I/O TLB buffer (in MiB)"
> +   default "1024"
> +   depends on PPC_SVM
> +
>  config PPC_TRANSACTIONAL_MEM
> bool "Transactional Memory support for POWERPC"
> depends on PPC_BOOK3S_64
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 04b68d9dffac..32dc67422d8a 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -146,8 +146,13 @@ void swiotlb_set_max_segment(unsigned int val)
>   max_segment = rounddown(val, PAGE_SIZE);
>  }
>  
> +#ifdef CONFIG_SWIOTLB_DEFAULT_SIZE
> +#define IO_TLB_DEFAULT_SIZE ((unsigned long) CONFIG_SWIOTLB_DEFAULT_SIZE << 
> 20)
> +#else
>  /* default to 64MB */
>  #define IO_TLB_DEFAULT_SIZE (64UL<<20)
> +#endif
> +
>  unsigned long swiotlb_size_or_default(void)
>  {
>   unsigned long size;
> 


Re: [PATCH 05/20] swiotlb: allow the architecture to provide a get_required_mask hook

2018-08-27 Thread Konrad Rzeszutek Wilk
On Mon, Jul 30, 2018 at 06:38:09PM +0200, Christoph Hellwig wrote:
> For now this allows consolidating the powerpc code.  In the long run
> we should grow a generic implementation of dma_get_required_mask that
> returns the dma mask required to avoid bounce buffering.
> 
> Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 

Thank you!
> ---
>  kernel/dma/swiotlb.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 904541055792..1bb420244753 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -1084,5 +1084,9 @@ const struct dma_map_ops swiotlb_dma_ops = {
>   .map_page   = swiotlb_map_page,
>   .unmap_page = swiotlb_unmap_page,
>   .dma_supported  = dma_direct_supported,
> +#ifdef swiotlb_get_required_mask
> + .get_required_mask  = swiotlb_get_required_mask,
> +#endif
> +
>  };
>  EXPORT_SYMBOL(swiotlb_dma_ops);
> -- 
> 2.18.0
> 


Re: [PATCH 06/22] swiotlb: rename swiotlb_free to swiotlb_exit

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:09:16AM +0100, Christoph Hellwig wrote:

OK?

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> Signed-off-by: Christoph Hellwig <h...@lst.de>
> ---
>  arch/powerpc/kernel/dma-swiotlb.c | 2 +-
>  arch/x86/kernel/pci-swiotlb.c | 2 +-
>  include/linux/swiotlb.h   | 4 ++--
>  lib/swiotlb.c | 2 +-
>  4 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
> b/arch/powerpc/kernel/dma-swiotlb.c
> index 506ac4fafac5..88f3963ca30f 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -121,7 +121,7 @@ static int __init check_swiotlb_enabled(void)
>   if (ppc_swiotlb_enable)
>   swiotlb_print_info();
>   else
> - swiotlb_free();
> + swiotlb_exit();
>  
>   return 0;
>  }
> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
> index 0d77603c2f50..0ee0f8f34251 100644
> --- a/arch/x86/kernel/pci-swiotlb.c
> +++ b/arch/x86/kernel/pci-swiotlb.c
> @@ -120,7 +120,7 @@ void __init pci_swiotlb_late_init(void)
>  {
>   /* An IOMMU turned us off. */
>   if (!swiotlb)
> - swiotlb_free();
> + swiotlb_exit();
>   else {
>   printk(KERN_INFO "PCI-DMA: "
>  "Using software bounce buffering for IO (SWIOTLB)\n");
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 24ed817082ee..606375e35d87 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -115,10 +115,10 @@ extern int
>  swiotlb_dma_supported(struct device *hwdev, u64 mask);
>  
>  #ifdef CONFIG_SWIOTLB
> -extern void __init swiotlb_free(void);
> +extern void __init swiotlb_exit(void);
>  unsigned int swiotlb_max_segment(void);
>  #else
> -static inline void swiotlb_free(void) { }
> +static inline void swiotlb_exit(void) { }
>  static inline unsigned int swiotlb_max_segment(void) { return 0; }
>  #endif
>  
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index 125c1062119f..cf5311908fa9 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -417,7 +417,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long 
> nslabs)
>   return -ENOMEM;
>  }
>  
> -void __init swiotlb_free(void)
> +void __init swiotlb_exit(void)
>  {
>   if (!io_tlb_orig_addr)
>   return;
> -- 
> 2.14.2
> 


Re: [PATCH 05/22] x86: rename swiotlb_dma_ops

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:09:15AM +0100, Christoph Hellwig wrote:
> We'll need that name for a generic implementation soon.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> ---
>  arch/x86/kernel/pci-swiotlb.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
> index 9d3e35c33d94..0d77603c2f50 100644
> --- a/arch/x86/kernel/pci-swiotlb.c
> +++ b/arch/x86/kernel/pci-swiotlb.c
> @@ -48,7 +48,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t 
> size,
>   dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
>  }
>  
> -static const struct dma_map_ops swiotlb_dma_ops = {
> +static const struct dma_map_ops x86_swiotlb_dma_ops = {
>   .mapping_error = swiotlb_dma_mapping_error,
>   .alloc = x86_swiotlb_alloc_coherent,
>   .free = x86_swiotlb_free_coherent,
> @@ -112,7 +112,7 @@ void __init pci_swiotlb_init(void)
>  {
>   if (swiotlb) {
>   swiotlb_init(0);
> - dma_ops = _dma_ops;
> + dma_ops = _swiotlb_dma_ops;
>   }
>  }
>  
> -- 
> 2.14.2
> 


Re: [PATCH 04/22] powerpc: rename swiotlb_dma_ops

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:09:14AM +0100, Christoph Hellwig wrote:
> We'll need that name for a generic implementation soon.
> 
Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> Signed-off-by: Christoph Hellwig <h...@lst.de>
> ---
>  arch/powerpc/include/asm/swiotlb.h | 2 +-
>  arch/powerpc/kernel/dma-swiotlb.c  | 4 ++--
>  arch/powerpc/kernel/dma.c  | 2 +-
>  arch/powerpc/sysdev/fsl_pci.c  | 2 +-
>  4 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/swiotlb.h 
> b/arch/powerpc/include/asm/swiotlb.h
> index 9341ee804d19..f65ecf57b66c 100644
> --- a/arch/powerpc/include/asm/swiotlb.h
> +++ b/arch/powerpc/include/asm/swiotlb.h
> @@ -13,7 +13,7 @@
>  
>  #include 
>  
> -extern const struct dma_map_ops swiotlb_dma_ops;
> +extern const struct dma_map_ops powerpc_swiotlb_dma_ops;
>  
>  extern unsigned int ppc_swiotlb_enable;
>  int __init swiotlb_setup_bus_notifier(void);
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
> b/arch/powerpc/kernel/dma-swiotlb.c
> index f1e99b9cee97..506ac4fafac5 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
>   * map_page, and unmap_page on highmem, use normal dma_ops
>   * for everything else.
>   */
> -const struct dma_map_ops swiotlb_dma_ops = {
> +const struct dma_map_ops powerpc_swiotlb_dma_ops = {
>   .alloc = __dma_nommu_alloc_coherent,
>   .free = __dma_nommu_free_coherent,
>   .mmap = dma_nommu_mmap_coherent,
> @@ -89,7 +89,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
>  
>   /* May need to bounce if the device can't address all of DRAM */
>   if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
> - set_dma_ops(dev, _dma_ops);
> + set_dma_ops(dev, _swiotlb_dma_ops);
>  
>   return NOTIFY_DONE;
>  }
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index 76079841d3d0..da20569de9d4 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev)
>   struct dev_archdata __maybe_unused *sd = >archdata;
>  
>  #ifdef CONFIG_SWIOTLB
> - if (sd->max_direct_dma_addr && dev->dma_ops == _dma_ops)
> + if (sd->max_direct_dma_addr && dev->dma_ops == _swiotlb_dma_ops)
>   pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
>  #endif
>  
> diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
> index e4d0133bbeeb..61e07c78d64f 100644
> --- a/arch/powerpc/sysdev/fsl_pci.c
> +++ b/arch/powerpc/sysdev/fsl_pci.c
> @@ -118,7 +118,7 @@ static void setup_swiotlb_ops(struct pci_controller *hose)
>  {
>   if (ppc_swiotlb_enable) {
>   hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
> - set_pci_dma_ops(_dma_ops);
> + set_pci_dma_ops(_swiotlb_dma_ops);
>   }
>  }
>  #else
> -- 
> 2.14.2
> 


Re: [PATCH 03/22] ia64: rename swiotlb_dma_ops

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:09:13AM +0100, Christoph Hellwig wrote:
> We'll need that name for a generic implementation soon.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>


Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> ---
>  arch/ia64/hp/common/hwsw_iommu.c | 4 ++--
>  arch/ia64/hp/common/sba_iommu.c  | 6 +++---
>  arch/ia64/kernel/pci-swiotlb.c   | 6 +++---
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/ia64/hp/common/hwsw_iommu.c 
> b/arch/ia64/hp/common/hwsw_iommu.c
> index 63d8e1d2477f..41279f0442bd 100644
> --- a/arch/ia64/hp/common/hwsw_iommu.c
> +++ b/arch/ia64/hp/common/hwsw_iommu.c
> @@ -19,7 +19,7 @@
>  #include 
>  #include 
>  
> -extern const struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
> +extern const struct dma_map_ops sba_dma_ops, ia64_swiotlb_dma_ops;
>  
>  /* swiotlb declarations & definitions: */
>  extern int swiotlb_late_init_with_default_size (size_t size);
> @@ -38,7 +38,7 @@ static inline int use_swiotlb(struct device *dev)
>  const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
>  {
>   if (use_swiotlb(dev))
> - return _dma_ops;
> + return _swiotlb_dma_ops;
>   return _dma_ops;
>  }
>  EXPORT_SYMBOL(hwsw_dma_get_ops);
> diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
> index aec4a3354abe..8c0a9ae6afec 100644
> --- a/arch/ia64/hp/common/sba_iommu.c
> +++ b/arch/ia64/hp/common/sba_iommu.c
> @@ -2096,7 +2096,7 @@ static int __init acpi_sba_ioc_init_acpi(void)
>  /* This has to run before acpi_scan_init(). */
>  arch_initcall(acpi_sba_ioc_init_acpi);
>  
> -extern const struct dma_map_ops swiotlb_dma_ops;
> +extern const struct dma_map_ops ia64_swiotlb_dma_ops;
>  
>  static int __init
>  sba_init(void)
> @@ -2111,7 +2111,7 @@ sba_init(void)
>* a successful kdump kernel boot is to use the swiotlb.
>*/
>   if (is_kdump_kernel()) {
> - dma_ops = _dma_ops;
> + dma_ops = _swiotlb_dma_ops;
>   if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
>   panic("Unable to initialize software I/O TLB:"
> " Try machvec=dig boot option");
> @@ -2133,7 +2133,7 @@ sba_init(void)
>* If we didn't find something sba_iommu can claim, we
>* need to setup the swiotlb and switch to the dig machvec.
>*/
> - dma_ops = _dma_ops;
> + dma_ops = _swiotlb_dma_ops;
>   if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
>   panic("Unable to find SBA IOMMU or initialize "
> "software I/O TLB: Try machvec=dig boot option");
> diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
> index 5e50939aa03e..f1ae873a8c35 100644
> --- a/arch/ia64/kernel/pci-swiotlb.c
> +++ b/arch/ia64/kernel/pci-swiotlb.c
> @@ -31,7 +31,7 @@ static void ia64_swiotlb_free_coherent(struct device *dev, 
> size_t size,
>   swiotlb_free_coherent(dev, size, vaddr, dma_addr);
>  }
>  
> -const struct dma_map_ops swiotlb_dma_ops = {
> +const struct dma_map_ops ia64_swiotlb_dma_ops = {
>   .alloc = ia64_swiotlb_alloc_coherent,
>   .free = ia64_swiotlb_free_coherent,
>   .map_page = swiotlb_map_page,
> @@ -48,7 +48,7 @@ const struct dma_map_ops swiotlb_dma_ops = {
>  
>  void __init swiotlb_dma_init(void)
>  {
> - dma_ops = _dma_ops;
> + dma_ops = _swiotlb_dma_ops;
>   swiotlb_init(1);
>  }
>  
> @@ -60,7 +60,7 @@ void __init pci_swiotlb_init(void)
>   printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
>   machvec_init("dig");
>   swiotlb_init(1);
> - dma_ops = _dma_ops;
> + dma_ops = _swiotlb_dma_ops;
>  #else
>   panic("Unable to find Intel IOMMU");
>  #endif
> -- 
> 2.14.2
> 


Re: [PATCH 21/33] dma-mapping: add an arch_dma_supported hook

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:00:15AM +0100, Christoph Hellwig wrote:
> To implement the x86 forbid_dac and iommu_sac_force we want an arch hook
> so that it can apply the global options across all dma_map_ops
> implementations.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> ---
>  arch/x86/include/asm/dma-mapping.h |  3 +++
>  arch/x86/kernel/pci-dma.c  | 19 ---
>  include/linux/dma-mapping.h| 11 +++
>  3 files changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/dma-mapping.h 
> b/arch/x86/include/asm/dma-mapping.h
> index dfdc9357a349..6277c83c0eb1 100644
> --- a/arch/x86/include/asm/dma-mapping.h
> +++ b/arch/x86/include/asm/dma-mapping.h
> @@ -30,6 +30,9 @@ static inline const struct dma_map_ops 
> *get_arch_dma_ops(struct bus_type *bus)
>   return dma_ops;
>  }
>  
> +int arch_dma_supported(struct device *dev, u64 mask);
> +#define arch_dma_supported arch_dma_supported
> +
>  bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
>  #define arch_dma_alloc_attrs arch_dma_alloc_attrs
>  
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index 61a8f1cb3829..df7ab02f959f 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -215,7 +215,7 @@ static __init int iommu_setup(char *p)
>  }
>  early_param("iommu", iommu_setup);
>  
> -int x86_dma_supported(struct device *dev, u64 mask)
> +int arch_dma_supported(struct device *dev, u64 mask)
>  {
>  #ifdef CONFIG_PCI
>   if (mask > 0x && forbid_dac > 0) {
> @@ -224,12 +224,6 @@ int x86_dma_supported(struct device *dev, u64 mask)
>   }
>  #endif
>  
> - /* Copied from i386. Doesn't make much sense, because it will
> -only work for pci_alloc_coherent.
> -The caller just has to use GFP_DMA in this case. */
> - if (mask < DMA_BIT_MASK(24))
> - return 0;
> -
>   /* Tell the device to use SAC when IOMMU force is on.  This
>  allows the driver to use cheaper accesses in some cases.
>  
> @@ -249,6 +243,17 @@ int x86_dma_supported(struct device *dev, u64 mask)
>  
>   return 1;
>  }
> +EXPORT_SYMBOL(arch_dma_supported);
> +
> +int x86_dma_supported(struct device *dev, u64 mask)
> +{
> + /* Copied from i386. Doesn't make much sense, because it will
> +only work for pci_alloc_coherent.
> +The caller just has to use GFP_DMA in this case. */
> + if (mask < DMA_BIT_MASK(24))
> + return 0;
> + return 1;
> +}
>  
>  static int __init pci_iommu_init(void)
>  {
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 88bcb1a8211d..d67742dad904 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -576,6 +576,14 @@ static inline int dma_mapping_error(struct device *dev, 
> dma_addr_t dma_addr)
>   return 0;
>  }
>  
> +/*
> + * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
> + * don't use this is new code.
> + */
> +#ifndef arch_dma_supported
> +#define arch_dma_supported(dev, mask)(1)
> +#endif
> +
>  static inline void dma_check_mask(struct device *dev, u64 mask)
>  {
>   if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
> @@ -588,6 +596,9 @@ static inline int dma_supported(struct device *dev, u64 
> mask)
>  
>   if (!ops)
>   return 0;
> + if (!arch_dma_supported(dev, mask))
> + return 0;
> +
>   if (!ops->dma_supported)
>   return 1;
>   return ops->dma_supported(dev, mask);
> -- 
> 2.14.2
> 


Re: [PATCH 19/33] dma-mapping: warn when there is no coherent_dma_mask

2018-01-12 Thread Konrad Rzeszutek Wilk
On Wed, Jan 10, 2018 at 09:00:13AM +0100, Christoph Hellwig wrote:
> These days all devices should have a DMA coherent mask, and most dma_ops
> implementations rely on that fact.  But just to be sure add an assert to
> ring the warning bell if that is not the case.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
> ---
>  include/linux/dma-mapping.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index d84951865be7..9f28b2fa329e 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -513,6 +513,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
> size_t size,
>   void *cpu_addr;
>  
>   BUG_ON(!ops);
> + WARN_ON_ONCE(!dev->coherent_dma_mask);
>  
>   if (dma_alloc_from_dev_coherent(dev, size, dma_handle, _addr))
>   return cpu_addr;
> -- 
> 2.14.2
> 


Re: [PATCH 08/44] xen-swiotlb: implement ->mapping_error

2017-06-10 Thread Konrad Rzeszutek Wilk
On Thu, Jun 08, 2017 at 03:25:33PM +0200, Christoph Hellwig wrote:
> DMA_ERROR_CODE is going to go away, so don't rely on it.

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>


Re: [PATCH 07/44] xen-swiotlb: consolidate xen_swiotlb_dma_ops

2017-06-10 Thread Konrad Rzeszutek Wilk
On Thu, Jun 08, 2017 at 03:25:32PM +0200, Christoph Hellwig wrote:
> ARM and x86 had duplicated versions of the dma_ops structure, the
> only difference is that x86 hasn't wired up the set_dma_mask,
> mmap, and get_sgtable ops yet.  On x86 all of them are identical
> to the generic version, so they aren't needed but harmless.
> 
> All the symbols used only for xen_swiotlb_dma_ops can now be marked
> static as well.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>
> ---
>  arch/arm/xen/mm.c  | 17 
>  arch/x86/xen/pci-swiotlb-xen.c | 14 ---
>  drivers/xen/swiotlb-xen.c  | 93 
> ++
>  include/xen/swiotlb-xen.h  | 62 +---
>  4 files changed, 49 insertions(+), 137 deletions(-)

Yeeey!

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>


Re: [Xen-devel] [PATCH v6 10/11] x86, xen: support vcpu preempted check

2016-10-28 Thread Konrad Rzeszutek Wilk
On Fri, Oct 28, 2016 at 04:11:26AM -0400, Pan Xinhui wrote:
> From: Juergen Gross 
> 
> Support the vcpu_is_preempted() functionality under Xen. This will
> enhance lock performance on overcommitted hosts (more runnable vcpus
> than physical cpus in the system) as doing busy waits for preempted
> vcpus will hurt system performance far worse than early yielding.
> 
> A quick test (4 vcpus on 1 physical cpu doing a parallel build job
> with "make -j 8") reduced system time by about 5% with this patch.
> 
> Signed-off-by: Juergen Gross 
> Signed-off-by: Pan Xinhui 
> ---
>  arch/x86/xen/spinlock.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
> index 3d6e006..74756bb 100644
> --- a/arch/x86/xen/spinlock.c
> +++ b/arch/x86/xen/spinlock.c
> @@ -114,7 +114,6 @@ void xen_uninit_lock_cpu(int cpu)
>   per_cpu(irq_name, cpu) = NULL;
>  }
>  
> -

Spurious change.
>  /*
>   * Our init of PV spinlocks is split in two init functions due to us
>   * using paravirt patching and jump labels patching and having to do
> @@ -137,6 +136,8 @@ void __init xen_init_spinlocks(void)
>   pv_lock_ops.queued_spin_unlock = 
> PV_CALLEE_SAVE(__pv_queued_spin_unlock);
>   pv_lock_ops.wait = xen_qlock_wait;
>   pv_lock_ops.kick = xen_qlock_kick;
> +
> + pv_lock_ops.vcpu_is_preempted = xen_vcpu_stolen;
>  }
>  
>  /*
> -- 
> 2.4.11
> 
> 
> ___
> Xen-devel mailing list
> xen-de...@lists.xen.org
> https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v6 00/11] implement vcpu preempted check

2016-10-28 Thread Konrad Rzeszutek Wilk
On Fri, Oct 28, 2016 at 04:11:16AM -0400, Pan Xinhui wrote:
> change from v5:
>   spilt x86/kvm patch into guest/host part.
>   introduce kvm_write_guest_offset_cached.
>   fix some typos.
>   rebase patch onto 4.9.2
> change from v4:
>   spilt x86 kvm vcpu preempted check into two patches.
>   add documentation patch.
>   add x86 vcpu preempted check patch under xen
>   add s390 vcpu preempted check patch 
> change from v3:
>   add x86 vcpu preempted check patch
> change from v2:
>   no code change, fix typos, update some comments
> change from v1:
>   a simplier definition of default vcpu_is_preempted
>   skip mahcine type check on ppc, and add config. remove dedicated macro.
>   add one patch to drop overload of rwsem_spin_on_owner and 
> mutex_spin_on_owner. 
>   add more comments
>   thanks boqun and Peter's suggestion.
> 
> This patch set aims to fix lock holder preemption issues.

Do you have a git tree with these patches?

> 
> test-case:
> perf record -a perf bench sched messaging -g 400 -p && perf report
> 
> 18.09%  sched-messaging  [kernel.vmlinux]  [k] osq_lock
> 12.28%  sched-messaging  [kernel.vmlinux]  [k] rwsem_spin_on_owner
>  5.27%  sched-messaging  [kernel.vmlinux]  [k] mutex_unlock
>  3.89%  sched-messaging  [kernel.vmlinux]  [k] wait_consider_task
>  3.64%  sched-messaging  [kernel.vmlinux]  [k] _raw_write_lock_irq
>  3.41%  sched-messaging  [kernel.vmlinux]  [k] mutex_spin_on_owner.is
>  2.49%  sched-messaging  [kernel.vmlinux]  [k] system_call
> 
> We introduce interface bool vcpu_is_preempted(int cpu) and use it in some spin
> loops of osq_lock, rwsem_spin_on_owner and mutex_spin_on_owner.
> These spin_on_onwer variant also cause rcu stall before we apply this patch 
> set
> 
> We also have observed some performace improvements in uninx benchmark tests.
> 
> PPC test result:
> 1 copy - 0.94%
> 2 copy - 7.17%
> 4 copy - 11.9%
> 8 copy -  3.04%
> 16 copy - 15.11%
> 
> details below:
> Without patch:
> 
> 1 copy - File Write 4096 bufsize 8000 maxblocks  2188223.0 KBps  (30.0 s, 
> 1 samples)
> 2 copy - File Write 4096 bufsize 8000 maxblocks  1804433.0 KBps  (30.0 s, 
> 1 samples)
> 4 copy - File Write 4096 bufsize 8000 maxblocks  1237257.0 KBps  (30.0 s, 
> 1 samples)
> 8 copy - File Write 4096 bufsize 8000 maxblocks  1032658.0 KBps  (30.0 s, 
> 1 samples)
> 16 copy - File Write 4096 bufsize 8000 maxblocks   768000.0 KBps  (30.1 
> s, 1 samples)
> 
> With patch: 
> 
> 1 copy - File Write 4096 bufsize 8000 maxblocks  2209189.0 KBps  (30.0 s, 
> 1 samples)
> 2 copy - File Write 4096 bufsize 8000 maxblocks  1943816.0 KBps  (30.0 s, 
> 1 samples)
> 4 copy - File Write 4096 bufsize 8000 maxblocks  1405591.0 KBps  (30.0 s, 
> 1 samples)
> 8 copy - File Write 4096 bufsize 8000 maxblocks  1065080.0 KBps  (30.0 s, 
> 1 samples)
> 16 copy - File Write 4096 bufsize 8000 maxblocks   904762.0 KBps  (30.0 
> s, 1 samples)
> 
> X86 test result:
>   test-case   after-patch   before-patch
> Execl Throughput   |18307.9 lps  |11701.6 lps 
> File Copy 1024 bufsize 2000 maxblocks  |  1352407.3 KBps |   790418.9 KBps
> File Copy 256 bufsize 500 maxblocks|   367555.6 KBps |   222867.7 KBps
> File Copy 4096 bufsize 8000 maxblocks  |  3675649.7 KBps |  1780614.4 KBps
> Pipe Throughput| 11872208.7 lps  | 11855628.9 lps 
> Pipe-based Context Switching   |  1495126.5 lps  |  1490533.9 lps 
> Process Creation   |29881.2 lps  |28572.8 lps 
> Shell Scripts (1 concurrent)   |23224.3 lpm  |22607.4 lpm 
> Shell Scripts (8 concurrent)   | 3531.4 lpm  | 3211.9 lpm 
> System Call Overhead   | 10385653.0 lps  | 10419979.0 lps 
> 
> Christian Borntraeger (1):
>   s390/spinlock: Provide vcpu_is_preempted
> 
> Juergen Gross (1):
>   x86, xen: support vcpu preempted check
> 
> Pan Xinhui (9):
>   kernel/sched: introduce vcpu preempted check interface
>   locking/osq: Drop the overload of osq_lock()
>   kernel/locking: Drop the overload of {mutex,rwsem}_spin_on_owner
>   powerpc/spinlock: support vcpu preempted check
>   x86, paravirt: Add interface to support kvm/xen vcpu preempted check
>   KVM: Introduce kvm_write_guest_offset_cached
>   x86, kvm/x86.c: support vcpu preempted check
>   x86, kernel/kvm.c: support vcpu preempted check
>   Documentation: virtual: kvm: Support vcpu preempted check
> 
>  Documentation/virtual/kvm/msr.txt |  9 -
>  arch/powerpc/include/asm/spinlock.h   |  8 
>  arch/s390/include/asm/spinlock.h  |  8 
>  arch/s390/kernel/smp.c|  9 +++--
>  arch/s390/lib/spinlock.c  | 25 -
>  arch/x86/include/asm/paravirt_types.h |  2 ++
>  arch/x86/include/asm/spinlock.h   |  8 
>  arch/x86/include/uapi/asm/kvm_para.h  |  4 +++-
>  

Re: [PATCH 2/3] swiotlb: prefix dma_to_phys and phys_to_dma functions

2016-03-28 Thread Konrad Rzeszutek Wilk
On Fri, Mar 18, 2016 at 11:00 AM, Sinan Kaya  wrote:
> On 3/18/2016 8:12 AM, Robin Murphy wrote:
>> Since we know for sure that swiotlb_to_phys is a no-op on arm64, it might be 
>> cleaner to simply not reference it at all. I suppose we could have some 
>> private local wrappers, e.g.:
>>
>> #define swiotlb_to_virt(addr) phys_to_virt((phys_addr_t)(addr))
>>
>> to keep the intent of the code clear (and just in case anyone ever builds a 
>> system mad enough to warrant switching out that definition, but I'd hope 
>> that never happens).
>>
>> Otherwise, looks good - thanks for doing this!
>
> OK. I added this. Reviewed-by?
>
> I'm not happy to submit such a big patch for all different ARCHs. I couldn't
> find a cleaner solution. I'm willing to split this patch into multiple if 
> there
> is a better way.
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index ada00c3..8c0f66b 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -29,6 +29,14 @@
>
>  #include 
>
> +/*
> + * If you are building a system without IOMMU, then you are using SWIOTLB
> + * library. The ARM64 adaptation of this library does not support address
> + * translation and it assumes that physical address = dma address for such
> + * a use case. Please don't build a platform that violates this.
> + */

Why not just expand the ARM64 part to support address translation?

> +#define swiotlb_to_virt(addr) phys_to_virt((phys_addr_t)(addr))
> +

Adding Stefano here.

>  static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
>  bool coherent)
>  {
> @@ -188,7 +196,7 @@ static void __dma_free(struct device *dev, size_t size,
>void *vaddr, dma_addr_t dma_handle,
>struct dma_attrs *attrs)
>  {
> -   void *swiotlb_addr = phys_to_virt(swiotlb_dma_to_phys(dev, 
> dma_handle));
> +   void *swiotlb_addr = swiotlb_to_virt(dma_handle);
>
> size = PAGE_ALIGN(size);
>
> @@ -209,8 +217,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, 
> struct page *page,
>
> dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
> if (!is_device_dma_coherent(dev))
> -   __dma_map_area(phys_to_virt(swiotlb_dma_to_phys(dev, 
> dev_addr)),
> -  size, dir);
> +   __dma_map_area(swiotlb_to_virt(dev_addr), size, dir);
>
> return dev_addr;
>  }
> @@ -283,8 +290,7 @@ static void __swiotlb_sync_single_for_device(struct 
> device *dev,
>  {
> swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
> if (!is_device_dma_coherent(dev))
> -   __dma_map_area(phys_to_virt(swiotlb_dma_to_phys(dev, 
> dev_addr)),
> -  size, dir);
> +   __dma_map_area(swiotlb_to_virt(dev_addr), size, dir);
>  }
>
>
>
>
> --
> Sinan Kaya
> Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
> Foundation Collaborative Project
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 00/22] Use MSI chip framework to configure MSI/MSI-X in all platforms

2014-09-25 Thread Konrad Rzeszutek Wilk
On Thu, Sep 25, 2014 at 11:14:10AM +0800, Yijing Wang wrote:
 This series is based Bjorn's pci/msi branch
 git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/msi

Is there a git tree for these patches?
 
 Currently, there are a lot of weak arch functions in MSI code.
 Thierry Reding Introduced MSI chip framework to configure MSI/MSI-X in arm.
 This series use MSI chip framework to refactor MSI code across all platforms
 to eliminate weak arch functions. Then all MSI irqs will be managed in a 
 unified framework. Because this series changed a lot of ARCH MSI code,
 so tests in the platforms which MSI code modified are warmly welcomed!
 
 v1-v2:
 Add a patch to make s390 MSI code build happy between patch x86/xen/MSI: E..
 and s390/MSI: Use MSI... Fix several typo problems found by Lucas.
 
 RFC-v1: 
 Updated [patch 4/21] x86/xen/MSI: Eliminate..., export msi_chip instead
 of #ifdef to fix MSI bug in xen running in x86. 
 Rename arch_get_match_msi_chip() to arch_find_msi_chip().
 Drop use struct device as the msi_chip argument, we will do that
 later in another patchset.
 
 Yijing Wang (22):
   PCI/MSI: Clean up struct msi_chip argument
   PCI/MSI: Remove useless bus-msi assignment
   MSI: Remove the redundant irq_set_chip_data()
   x86/xen/MSI: Eliminate arch_msix_mask_irq() and arch_msi_mask_irq()
   s390/MSI: Use __msi_mask_irq() instead of default_msi_mask_irq()
   PCI/MSI: Introduce weak arch_find_msi_chip() to find MSI chip
   PCI/MSI: Refactor struct msi_chip to make it become more common
   x86/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   x86/xen/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   Irq_remapping/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   x86/MSI: Remove unused MSI weak arch functions
   MIPS/Octeon/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   MIPS/Xlp: Remove the dead function destroy_irq() to fix build error
   MIPS/Xlp/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   MIPS/Xlr/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   Powerpc/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   s390/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   arm/iop13xx/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   IA64/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   Sparc/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   tile/MSI: Use MSI chip framework to configure MSI/MSI-X irq
   PCI/MSI: Clean up unused MSI arch functions
 
  arch/arm/mach-iop13xx/include/mach/pci.h |2 +
  arch/arm/mach-iop13xx/iq81340mc.c|1 +
  arch/arm/mach-iop13xx/iq81340sc.c|1 +
  arch/arm/mach-iop13xx/msi.c  |9 ++-
  arch/arm/mach-iop13xx/pci.c  |6 ++
  arch/ia64/kernel/msi_ia64.c  |   18 -
  arch/mips/pci/msi-octeon.c   |   35 ++
  arch/mips/pci/msi-xlp.c  |   18 --
  arch/mips/pci/pci-xlr.c  |   15 -
  arch/powerpc/kernel/msi.c|   14 +++-
  arch/s390/pci/pci.c  |   18 -
  arch/sparc/kernel/pci.c  |   14 +++-
  arch/tile/kernel/pci_gx.c|   14 +++-
  arch/x86/include/asm/apic.h  |4 +
  arch/x86/include/asm/pci.h   |4 +-
  arch/x86/include/asm/x86_init.h  |7 --
  arch/x86/kernel/apic/io_apic.c   |   16 -
  arch/x86/kernel/x86_init.c   |   34 -
  arch/x86/pci/xen.c   |   60 +---
  drivers/iommu/irq_remapping.c|9 ++-
  drivers/irqchip/irq-armada-370-xp.c  |8 +--
  drivers/pci/host/pci-tegra.c |8 ++-
  drivers/pci/host/pcie-designware.c   |4 +-
  drivers/pci/host/pcie-rcar.c |8 ++-
  drivers/pci/msi.c|  114 
 ++
  drivers/pci/probe.c  |1 -
  include/linux/msi.h  |   26 ++-
  27 files changed, 266 insertions(+), 202 deletions(-)
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 04/22] x86/xen/MSI: Eliminate arch_msix_mask_irq() and arch_msi_mask_irq()

2014-09-25 Thread Konrad Rzeszutek Wilk
On Thu, Sep 25, 2014 at 11:14:14AM +0800, Yijing Wang wrote:
 Commit 0e4ccb150 added two __weak arch functions arch_msix_mask_irq()
 and arch_msi_mask_irq() to fix a bug found when running xen in x86.
 Introduced these two funcntions make MSI code complex. And mask/unmask

These two functions made the MSI code more complex.
 is the irq actions related to interrupt controller, should not use
 weak arch functions to override mask/unmask interfaces. This patch

I am bit baffled of what you are saying.
 reverted commit 0e4ccb150 and export struct irq_chip msi_chip, modify
 msi_chip-irq_mask/irq_unmask() in xen init functions to fix this
 bug for simplicity. Also this is preparation for using struct
 msi_chip instead of weak arch MSI functions in all platforms.
 Keep default_msi_mask_irq() and default_msix_mask_irq() in
 linux/msi.h to make s390 MSI code compile happy, they wiil be removed

s/wiil/will.

 in the later patch.
 
 Tested-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com

I don't even remember testing it - I guess I did the earlier version.

So a couple of questions since I've totally forgotten about this:

 diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
 index 50f67a3..5f8f3af 100644
 --- a/drivers/pci/msi.c
 +++ b/drivers/pci/msi.c
 @@ -162,7 +162,7 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
   * reliably as devices without an INTx disable bit will then generate a
   * level IRQ which will never be cleared.
   */
 -u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 +u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  {
   u32 mask_bits = desc-masked;
  
 @@ -176,14 +176,9 @@ u32 default_msi_mask_irq(struct msi_desc *desc, u32 
 mask, u32 flag)
   return mask_bits;
  }
  
 -__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 -{
 - return default_msi_mask_irq(desc, mask, flag);
 -}
 -
  static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  {
 - desc-masked = arch_msi_mask_irq(desc, mask, flag);
 + desc-masked = __msi_mask_irq(desc, mask, flag);
  }
  
  /*
 @@ -193,7 +188,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, 
 u32 flag)
   * file.  This saves a few milliseconds when initialising devices with lots
   * of MSI-X interrupts.
   */
 -u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
 +u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
  {
   u32 mask_bits = desc-masked;
   unsigned offset = desc-msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
 @@ -206,14 +201,9 @@ u32 default_msix_mask_irq(struct msi_desc *desc, u32 
 flag)
   return mask_bits;
  }
  
 -__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
 -{
 - return default_msix_mask_irq(desc, flag);
 -}
 -
  static void msix_mask_irq(struct msi_desc *desc, u32 flag)
  {
 - desc-masked = arch_msix_mask_irq(desc, flag);
 + desc-masked = __msix_mask_irq(desc, flag);
  }
  
  static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 @@ -852,7 +842,7 @@ void pci_msi_shutdown(struct pci_dev *dev)
   /* Return the device with MSI unmasked as initial states */
   mask = msi_mask(desc-msi_attrib.multi_cap);
   /* Keep cached state to be restored */
 - arch_msi_mask_irq(desc, mask, ~mask);
 + __msi_mask_irq(desc, mask, ~mask);

If I am reading this right, it will call the old 'default_msi_mask_irq'
which is exactly what we don't want to do under Xen. We want to call
the NOP code.
  
   /* Restore dev-irq to its default pin-assertion irq */
   dev-irq = desc-msi_attrib.default_irq;
 @@ -950,7 +940,7 @@ void pci_msix_shutdown(struct pci_dev *dev)
   /* Return the device with MSI-X masked as initial states */
   list_for_each_entry(entry, dev-msi_list, list) {
   /* Keep cached states to be restored */
 - arch_msix_mask_irq(entry, 1);
 + __msix_mask_irq(entry, 1);

Ditto here.

Looking more at this code I have to retract my Reviewed-by and Tested-by
on the whole series.

Is it possible to get a git tree for this please?

   }
  
   msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 diff --git a/include/linux/msi.h b/include/linux/msi.h
 index 45ef8cb..cc46a62 100644
 --- a/include/linux/msi.h
 +++ b/include/linux/msi.h
 @@ -19,6 +19,8 @@ void read_msi_msg(struct msi_desc *entry, struct msi_msg 
 *msg);
  void get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
  void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
  void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 +u32 __msix_mask_irq(struct msi_desc *desc, u32 flag);
 +u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
  
  struct msi_desc {
   struct {
 @@ -59,8 +61,8 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
  
  void default_teardown_msi_irqs(struct pci_dev *dev);
  void default_restore_msi_irqs(struct pci_dev *dev);
 -u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32

Re: [Xen-devel] [PATCH v1 08/21] x86/xen/MSI: Use MSI chip framework to configure MSI/MSI-X irq

2014-09-10 Thread Konrad Rzeszutek Wilk
On Wed, Sep 10, 2014 at 01:38:25PM +0100, David Vrabel wrote:
 On 09/09/14 03:06, Yijing Wang wrote:
  On 2014/9/5 22:29, David Vrabel wrote:
  On 05/09/14 11:09, Yijing Wang wrote:
  Use MSI chip framework instead of arch MSI functions to configure
  MSI/MSI-X irq. So we can manage MSI/MSI-X irq in a unified framework.
  [...]
  --- a/arch/x86/pci/xen.c
  +++ b/arch/x86/pci/xen.c
  [...]
  @@ -418,9 +430,9 @@ int __init pci_xen_init(void)
   #endif
   
   #ifdef CONFIG_PCI_MSI
  - x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
  - x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
  - x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
  + xen_msi_chip.setup_irqs = xen_setup_msi_irqs;
  + xen_msi_chip.teardown_irqs = xen_teardown_msi_irqs;
  + x86_msi_chip = xen_msi_chip;
msi_chip.irq_mask = xen_nop_msi_mask;
msi_chip.irq_unmask = xen_nop_msi_mask;
 
  Why have these not been changed to set the x86_msi_chip.mask/unmask
  fields instead?
  
  Hi David, x86_msi_chip here is struct msi_chip data type, used to configure 
  MSI/MSI-X
  irq. msi_chip above is struct irq_chip data type, represent the MSI irq 
  controller. They are
  not the same object. Their name easily confusing people.
 
 Ok, it all makes sense now.
 
 Acked-by: David Vrabel david.vra...@citrix.com

You can also add 'Tested-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com'

on the whole series - I ran it through on Xen and on baremetal with a mix of 
32/64 builds.

Oh, and also Reviewed-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com the 
Xen parts.

 
 David
 
 ___
 Xen-devel mailing list
 xen-de...@lists.xen.org
 http://lists.xen.org/xen-devel
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 07/20] x86/MSI: Use msi_chip instead of arch func to configure MSI/MSI-X

2014-08-12 Thread Konrad Rzeszutek Wilk
On Tue, Aug 12, 2014 at 03:26:00PM +0800, Yijing Wang wrote:
 Introduce a new struct msi_chip apic_msi_chip instead of weak arch
 functions to configure MSI/MSI-X in x86.

Why not 'x86_msi_ops' (see  arch/x86/kernel/x86_init.c)
 
 Signed-off-by: Yijing Wang wangyij...@huawei.com
 ---
  arch/x86/include/asm/pci.h |1 +
  arch/x86/kernel/apic/io_apic.c |   20 
  2 files changed, 17 insertions(+), 4 deletions(-)
 
 diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
 index 0892ea0..878a06d 100644
 --- a/arch/x86/include/asm/pci.h
 +++ b/arch/x86/include/asm/pci.h
 @@ -101,6 +101,7 @@ void native_teardown_msi_irq(unsigned int irq);
  void native_restore_msi_irqs(struct pci_dev *dev);
  int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 unsigned int irq_base, unsigned int irq_offset);
 +extern struct msi_chip *x86_msi_chip;
  #else
  #define native_setup_msi_irqsNULL
  #define native_teardown_msi_irq  NULL
 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
 index 2609dcd..eb8ab7c 100644
 --- a/arch/x86/kernel/apic/io_apic.c
 +++ b/arch/x86/kernel/apic/io_apic.c
 @@ -3077,24 +3077,25 @@ int setup_msi_irq(struct pci_dev *dev, struct 
 msi_desc *msidesc,
   return 0;
  }
  
 -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 +int native_setup_msi_irqs(struct device *dev, int nvec, int type)
  {
   struct msi_desc *msidesc;
   unsigned int irq;
   int node, ret;
 + struct pci_dev *pdev = to_pci_dev(dev);
  
   /* Multiple MSI vectors only supported with interrupt remapping */
   if (type == PCI_CAP_ID_MSI  nvec  1)
   return 1;
  
 - node = dev_to_node(dev-dev);
 + node = dev_to_node(dev);
  
 - list_for_each_entry(msidesc, dev-msi_list, list) {
 + list_for_each_entry(msidesc, pdev-msi_list, list) {
   irq = irq_alloc_hwirq(node);
   if (!irq)
   return -ENOSPC;
  
 - ret = setup_msi_irq(dev, msidesc, irq, 0);
 + ret = setup_msi_irq(pdev, msidesc, irq, 0);
   if (ret  0) {
   irq_free_hwirq(irq);
   return ret;
 @@ -3214,6 +3215,17 @@ int default_setup_hpet_msi(unsigned int irq, unsigned 
 int id)
  }
  #endif
  
 +struct msi_chip apic_msi_chip = {
 + .setup_irqs = native_setup_msi_irqs,
 + .teardown_irq = native_teardown_msi_irq,
 +};
 +
 +struct msi_chip *arch_get_match_msi_chip(struct device *dev)
 +{
 + return x86_msi_chip;
 +}
 +
 +struct msi_chip *x86_msi_chip = apic_msi_chip;
  #endif /* CONFIG_PCI_MSI */
  /*
   * Hypertransport interrupt support
 -- 
 1.7.1
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 29/45] x86/xen: Use get/put_online_cpus_atomic() to prevent CPU offline

2013-02-19 Thread Konrad Rzeszutek Wilk
On Tue, Jan 22, 2013 at 01:10:51PM +0530, Srivatsa S. Bhat wrote:
 Once stop_machine() is gone from the CPU offline path, we won't be able to
 depend on preempt_disable() or local_irq_disable() to prevent CPUs from
 going offline from under us.
 
 Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
 while invoking from atomic context.
 
 Cc: Konrad Rzeszutek Wilk konrad.w...@oracle.com

Weird. I see this in the patch but I don't see it in the header? Did you
explicitly suppress the CC part?


Anyhow, the patch looks sane enough, thought I need to to run it through
a test framework just to be on a sure side.

 Cc: Jeremy Fitzhardinge jer...@goop.org
 Cc: H. Peter Anvin h...@zytor.com
 Cc: x...@kernel.org
 Cc: xen-de...@lists.xensource.com
 Cc: virtualizat...@lists.linux-foundation.org
 Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com
 ---
 
  arch/x86/xen/mmu.c |   11 +--
  arch/x86/xen/smp.c |9 +
  2 files changed, 18 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
 index 01de35c..6a95a15 100644
 --- a/arch/x86/xen/mmu.c
 +++ b/arch/x86/xen/mmu.c
 @@ -39,6 +39,7 @@
   * Jeremy Fitzhardinge jer...@xensource.com, XenSource Inc, 2007
   */
  #include linux/sched.h
 +#include linux/cpu.h
  #include linux/highmem.h
  #include linux/debugfs.h
  #include linux/bug.h
 @@ -1163,9 +1164,13 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
   */
  static void xen_exit_mmap(struct mm_struct *mm)
  {
 - get_cpu();  /* make sure we don't move around */
 + /*
 +  * Make sure we don't move around, and prevent CPUs from going
 +  * offline.
 +  */
 + get_online_cpus_atomic();
   xen_drop_mm_ref(mm);
 - put_cpu();
 + put_online_cpus_atomic();
  
   spin_lock(mm-page_table_lock);
  
 @@ -1371,6 +1376,7 @@ static void xen_flush_tlb_others(const struct cpumask 
 *cpus,
   args-op.arg2.vcpumask = to_cpumask(args-mask);
  
   /* Remove us, and any offline CPUS. */
 + get_online_cpus_atomic();
   cpumask_and(to_cpumask(args-mask), cpus, cpu_online_mask);
   cpumask_clear_cpu(smp_processor_id(), to_cpumask(args-mask));
  
 @@ -1383,6 +1389,7 @@ static void xen_flush_tlb_others(const struct cpumask 
 *cpus,
   MULTI_mmuext_op(mcs.mc, args-op, 1, NULL, DOMID_SELF);
  
   xen_mc_issue(PARAVIRT_LAZY_MMU);
 + put_online_cpus_atomic();
  }
  
  static unsigned long xen_read_cr3(void)
 diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
 index 4f7d259..7d753ae 100644
 --- a/arch/x86/xen/smp.c
 +++ b/arch/x86/xen/smp.c
 @@ -16,6 +16,7 @@
  #include linux/err.h
  #include linux/slab.h
  #include linux/smp.h
 +#include linux/cpu.h
  #include linux/irq_work.h
  
  #include asm/paravirt.h
 @@ -487,8 +488,10 @@ static void __xen_send_IPI_mask(const struct cpumask 
 *mask,
  {
   unsigned cpu;
  
 + get_online_cpus_atomic();
   for_each_cpu_and(cpu, mask, cpu_online_mask)
   xen_send_IPI_one(cpu, vector);
 + put_online_cpus_atomic();
  }
  
  static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 @@ -551,8 +554,10 @@ void xen_send_IPI_all(int vector)
  {
   int xen_vector = xen_map_vector(vector);
  
 + get_online_cpus_atomic();
   if (xen_vector = 0)
   __xen_send_IPI_mask(cpu_online_mask, xen_vector);
 + put_online_cpus_atomic();
  }
  
  void xen_send_IPI_self(int vector)
 @@ -572,20 +577,24 @@ void xen_send_IPI_mask_allbutself(const struct cpumask 
 *mask,
   if (!(num_online_cpus()  1))
   return;
  
 + get_online_cpus_atomic();
   for_each_cpu_and(cpu, mask, cpu_online_mask) {
   if (this_cpu == cpu)
   continue;
  
   xen_smp_send_call_function_single_ipi(cpu);
   }
 + put_online_cpus_atomic();
  }
  
  void xen_send_IPI_allbutself(int vector)
  {
   int xen_vector = xen_map_vector(vector);
  
 + get_online_cpus_atomic();
   if (xen_vector = 0)
   xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector);
 + put_online_cpus_atomic();
  }
  
  static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [patch] hvc_xen: NULL dereference on allocation failure

2012-05-21 Thread Konrad Rzeszutek Wilk
On Tue, May 15, 2012 at 11:20:23AM +0100, Stefano Stabellini wrote:
 On Tue, 15 May 2012, Dan Carpenter wrote:
  If kzalloc() returns a NULL here, we pass a NULL to
  xencons_disconnect_backend() which will cause an Oops.
  
  Also I removed the __GFP_ZERO while I was at it since kzalloc() implies
  __GFP_ZERO.
  
  Signed-off-by: Dan Carpenter dan.carpen...@oracle.com
 
 Acked-by: Stefano Stabellini stefano.stabell...@eu.citrix.com

applied.
  
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: kvm PCI assignment VFIO ramblings

2011-08-02 Thread Konrad Rzeszutek Wilk
On Tue, Aug 02, 2011 at 09:34:58AM -0600, Alex Williamson wrote:
 On Tue, 2011-08-02 at 22:58 +1000, Benjamin Herrenschmidt wrote:
  
  Don't worry, it took me a while to get my head around the HW :-) SR-IOV
  VFs will generally not have limitations like that no, but on the other
  hand, they -will- still require 1 VF = 1 group, ie, you won't be able to
  take a bunch of VFs and put them in the same 'domain'.
  
  I think the main deal is that VFIO/qemu sees domains as guests and
  tries to put all devices for a given guest into a domain.
 
 Actually, that's only a recent optimization, before that each device got
 it's own iommu domain.  It's actually completely configurable on the
 qemu command line which devices get their own iommu and which share.
 The default optimizes the number of domains (one) and thus the number of
 mapping callbacks since we pin the entire guest.
 
  On POWER, we have a different view of things were domains/groups are
  defined to be the smallest granularity we can (down to a single VF) and
  we give several groups to a guest (ie we avoid sharing the iommu in most
  cases)
  
  This is driven by the HW design but that design is itself driven by the
  idea that the domains/group are also error isolation groups and we don't
  want to take all of the IOs of a guest down if one adapter in that guest
  is having an error.
  
  The x86 domains are conceptually different as they are about sharing the
  iommu page tables with the clear long term intent of then sharing those
  page tables with the guest CPU own. We aren't going in that direction
  (at this point at least) on POWER..
 
 Yes and no.  The x86 domains are pretty flexible and used a few
 different ways.  On the host we do dynamic DMA with a domain per device,
 mapping only the inflight DMA ranges.  In order to achieve the
 transparent device assignment model, we have to flip that around and map
 the entire guest.  As noted, we can continue to use separate domains for
 this, but since each maps the entire guest, it doesn't add a lot of
 value and uses more resources and requires more mapping callbacks (and
 x86 doesn't have the best error containment anyway).  If we had a well
 supported IOMMU model that we could adapt for pvDMA, then it would make
 sense to keep each device in it's own domain again.  Thanks,

Could you have an PV IOMMU (in the guest) that would set up those
maps?
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 7/7] [v2] drivers/misc: introduce Freescale hypervisor management driver

2011-06-06 Thread Konrad Rzeszutek Wilk
On Mon, Jun 06, 2011 at 05:01:36PM -0400, Chris Metcalf wrote:
 On 6/6/2011 12:24 PM, Arnd Bergmann wrote:
  On Monday 06 June 2011, Timur Tabi wrote:.
  And what about my concern that my driver will be the only one in 
  drivers/virt?
  I have no doubt that more of these will come. Chris Metcalf is currently
  looking for a home for his tilera hypervisor drivers, and we have the
  microsoft hyperv drivers in drivers/staging, so they will hopefully
  move to a proper place later. We also have similar drivers in other
  places, e.g. drivers/ps3/ps3-sys-manager.c, drivers/s390/char/vmcp.c
  or parts of drivers/xen.
 
 It might help if someone (Arnd?) wanted to propose a statement of what
 drivers/virt was really for.  If it's for any Linux driver that upcalls to

Was for? I am not seeing it in v3.0-rc2?

 a hypervisor for any reason, then the Tilera paravirtualized drivers fit in
 well.  If it's intended more for drivers that guests running under a
 hypervisor can use to talk to the hypervisor itself (e.g. managing

I believe that the code that deals with specific subsystem (so block API
for example) would reside in subsystem directory (so drivers/block would have
your virtualization block driver). This allows the maintainer of block
to make sure your driver is OK.

 notifications that a hypervisor delivers to a guest to cause it to shut
 down or take other actions), then it doesn't seem like the Tilera

That looks to be arch/x/tilera/virt/ candidate?

 paravirtualized device drivers belong there, since they're just using the
 Tilera hypervisor synchronously to do I/O or get/set device and driver state.

Well, I/O sounds like block API or network API. But then you are also
doing management ioctl - which implies drivers. drivers/tilera does not
work?

 
 -- 
 Chris Metcalf, Tilera Corp.
 http://www.tilera.com
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v5 08/10] wii: add mem2 dma mapping ops

2010-03-19 Thread Konrad Rzeszutek Wilk
 +int wii_set_mem2_dma_constraints(struct device *dev)
 +{
 + struct dev_archdata *sd;
 +
 + sd = dev-archdata;
 + sd-max_direct_dma_addr = 0;
 + sd-min_direct_dma_addr = wii_hole_start + wii_hole_size;
 +
 + set_dma_ops(dev, wii_mem2_dma_ops);
 + return 0;
 +}
 +EXPORT_SYMBOL(wii_set_mem2_dma_constraints);

Can you make them EXPORT_SYMBOL_GPL?
 +
 +/**
 + * wii_clear_mem2_dma_constraints() - clears device MEM2 DMA constraints
 + * @dev: device for which DMA constraints are cleared
 + *
 + * Instructs device @dev to stop using MEM2 DMA buffers for DMA transfers.
 + * Must be called to undo wii_set_mem2_dma_constraints().
 + */
 +void wii_clear_mem2_dma_constraints(struct device *dev)
 +{
 + struct dev_archdata *sd;
 +
 + sd = dev-archdata;
 + sd-max_direct_dma_addr = 0;
 + sd-min_direct_dma_addr = 0;
 +
 + set_dma_ops(dev, dma_direct_ops);
 +}
 +EXPORT_SYMBOL(wii_clear_mem2_dma_constraints);

Ditto..
 +
 +/*
 + * swiotlb-based DMA ops for MEM2-only devices on the Wii.
 + *
 + */
 +
 +/*
 + * Allocate the SWIOTLB from MEM2.
 + */
 +void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
 +{
 + return __alloc_bootmem_low(size, PAGE_SIZE,
 +wii_hole_start + wii_hole_size);
 +}
 +
 +/*
 + * Bounce: copy the swiotlb buffer back to the original dma location
 + * This is a platform specific version replacing the generic __weak version.
 + */
 +void swiotlb_bounce(phys_addr_t phys, char *dma_buf, size_t size,
 + enum dma_data_direction dir)
 +{
 + void *vaddr = phys_to_virt(phys);
 +
 + if (dir == DMA_TO_DEVICE) {
 + memcpy(dma_buf, vaddr, size);
 + __dma_sync(dma_buf, size, dir);
 + } else {
 + __dma_sync(dma_buf, size, dir);
 + memcpy(vaddr, dma_buf, size);
 + }
 +}
 +
 +static dma_addr_t
 +mem2_virt_to_bus(struct device *dev, void *address)
 +{
 + return phys_to_dma(dev, virt_to_phys(address));
 +}
 +
 +static int
 +mem2_dma_mapping_error(struct device *dev, dma_addr_t dma_handle)
 +{
 + return dma_handle == mem2_virt_to_bus(dev, swiotlb_bk_overflow_buffer);
 +}
 +
 +static int
 +mem2_dma_supported(struct device *dev, u64 mask)
 +{
 + return mem2_virt_to_bus(dev, swiotlb_bk_end - 1) = mask;
 +}
 +
 +/*
 + * Determines if a given DMA region specified by @dma_handle
 + * requires bouncing.
 + *
 + * Bouncing is required if the DMA region falls within MEM1.
 + */
 +static int mem2_needs_dmabounce(dma_addr_t dma_handle)
 +{
 + return dma_handle  wii_hole_start;
 +}
 +
 +/*
 + * Use the dma_direct_ops hooks for allocating and freeing coherent memory
 + * from the MEM2 DMA region.
 + */
 +
 +static void *mem2_alloc_coherent(struct device *dev, size_t size,
 +  dma_addr_t *dma_handle, gfp_t gfp)
 +{
 + void *vaddr;
 +
 + vaddr = dma_direct_ops.alloc_coherent(wii_mem2_dma_dev(), size,
 +   dma_handle, gfp);
 + if (vaddr  mem2_needs_dmabounce(*dma_handle)) {
 + dma_direct_ops.free_coherent(wii_mem2_dma_dev(), size, vaddr,
 +  *dma_handle);
 + dev_err(dev, failed to allocate MEM2 coherent memory\n);
 + vaddr = NULL;
 + }
 + return vaddr;
 +}
 +
 +static void mem2_free_coherent(struct device *dev, size_t size,
 +void *vaddr, dma_addr_t dma_handle)
 +{
 + dma_direct_ops.free_coherent(wii_mem2_dma_dev(), size, vaddr,
 +  dma_handle);
 +}
 +
 +/*
 + * Maps (part of) a page so it can be safely accessed by a device.
 + *
 + * Calls the corresponding dma_direct_ops hook if the page region falls
 + * within MEM2.
 + * Otherwise, a bounce buffer allocated from MEM2 coherent memory is used.
 + */
 +static dma_addr_t
 +mem2_map_page(struct device *dev, struct page *page, unsigned long offset,
 +   size_t size, enum dma_data_direction dir,
 +   struct dma_attrs *attrs)
 +{
 + phys_addr_t phys = page_to_phys(page) + offset;
 + dma_addr_t dma_handle = phys_to_dma(dev, phys);
 + dma_addr_t swiotlb_start_dma;
 + void *map;
 +
 + BUG_ON(dir == DMA_NONE);
 +
 + if (dma_capable(dev, dma_handle, size)  !swiotlb_force) {
 + return dma_direct_ops.map_page(dev, page, offset, size,
 +dir, attrs);
 + }
 +
 + swiotlb_start_dma = mem2_virt_to_bus(dev, swiotlb_bk_start);
 + map = swiotlb_bk_map_single(dev, phys, swiotlb_start_dma, size, dir);
 + if (!map) {
 + swiotlb_full(dev, size, dir, 1);
 + map = swiotlb_bk_overflow_buffer;
 + }
 +
 + dma_handle = mem2_virt_to_bus(dev, map);
 + BUG_ON(!dma_capable(dev, dma_handle, size));
 +
 + return dma_handle;
 +}
 +
 +/*
 + * Unmaps (part of) a page previously mapped.
 + *
 + * Calls the corresponding dma_direct_ops hook if the DMA region 

Re: [PATCH v5 08/10] wii: add mem2 dma mapping ops

2010-03-19 Thread Konrad Rzeszutek Wilk
 +/*
 + * The mem2_dma device.
 + *
 + * This device owns a pool of coherent MEM2 memory that can be shared among
 + * several devices requiring MEM2 DMA buffers, instead of dedicating specific
 + * pools for each device.
 + *
 + * A device can use the shared coherent MEM2 memory pool by calling
 + * wii_set_mem2_dma_constraints().
 + *
 + */
 +
 +struct mem2_dma {
 + struct platform_device *pdev;
 +

The space there isn't neccessary.

 + dma_addr_t dma_base;

I think you need only one of them. You don't seem to use 'base'
 + void *base;
 + size_t size;
 +};
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [LKML] [RFC PATCH v3 04/11] swiotlb: support NOT_COHERENT_CACHE PowerPC platforms

2010-03-09 Thread Konrad Rzeszutek Wilk
On Sun, Mar 07, 2010 at 01:11:45PM +0100, Albert Herranz wrote:
 The current SWIOTLB code does not support NOT_COHERENT_CACHE platforms.
 This patch adds support for NOT_COHERENT_CACHE platforms to SWIOTLB by
 adding two platform specific functions swiotlb_dma_sync_page() and
 swiotlb_dma_sync() which can be used to explicitly manage cache coherency.

Hey Albert,

I've been doing some posting in this area to split the physical / bus
address translation so that multiple platforms can utilize it. I was
wondering if it makes sense to utilize some of those concepts (ie, extend it
for DMA coherency) for your code:

https://lists.linux-foundation.org/pipermail/iommu/2010-February/002066.html

And here is the git tree that goes on top of those patches:
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb-2.6.git 
xen-swiotlb-0.5


 
 On PowerPC these functions are mapped to their corresponding
 __dma_sync_page() and __dma_sync() functions.
 On other architectures using SWIOTLB these functions are optimized out.
 
 This will be used later to support SWIOTLB on the Nintendo Wii video game
 console.
 
 Signed-off-by: Albert Herranz albert_herr...@yahoo.es
 CC: linuxppc-dev@lists.ozlabs.org
 CC: linux-ker...@vger.kernel.org
 CC: x...@kernel.org
 CC: linux-i...@vger.kernel.org
 ---
  arch/ia64/include/asm/swiotlb.h|   10 ++
  arch/powerpc/include/asm/swiotlb.h |3 +++
  arch/x86/include/asm/swiotlb.h |   10 ++
  lib/swiotlb.c  |   30 --
  4 files changed, 47 insertions(+), 6 deletions(-)
 
 diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
 index f0acde6..6722090 100644
 --- a/arch/ia64/include/asm/swiotlb.h
 +++ b/arch/ia64/include/asm/swiotlb.h
 @@ -14,4 +14,14 @@ static inline void pci_swiotlb_init(void)
  }
  #endif
  
 +static inline void swiotlb_dma_sync_page(struct page *page,
 +  unsigned long offset,
 +  size_t size, int direction)
 +{
 +}
 +
 +static inline void swiotlb_dma_sync(void *vaddr, size_t size, int direction)
 +{
 +}
 +
  #endif /* ASM_IA64__SWIOTLB_H */
 diff --git a/arch/powerpc/include/asm/swiotlb.h 
 b/arch/powerpc/include/asm/swiotlb.h
 index 8979d4c..603b343 100644
 --- a/arch/powerpc/include/asm/swiotlb.h
 +++ b/arch/powerpc/include/asm/swiotlb.h
 @@ -22,4 +22,7 @@ int __init swiotlb_setup_bus_notifier(void);
  
  extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
  
 +#define swiotlb_dma_sync_page __dma_sync_page
 +#define swiotlb_dma_sync __dma_sync
 +
  #endif /* __ASM_SWIOTLB_H */
 diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
 index 8085277..e5f6d9c 100644
 --- a/arch/x86/include/asm/swiotlb.h
 +++ b/arch/x86/include/asm/swiotlb.h
 @@ -20,4 +20,14 @@ static inline void pci_swiotlb_init(void)
  
  static inline void dma_mark_clean(void *addr, size_t size) {}
  
 +static inline void swiotlb_dma_sync_page(struct page *page,
 +  unsigned long offset,
 +  size_t size, int direction)
 +{
 +}
 +
 +static inline void swiotlb_dma_sync(void *vaddr, size_t size, int direction)
 +{
 +}
 +
  #endif /* _ASM_X86_SWIOTLB_H */
 diff --git a/lib/swiotlb.c b/lib/swiotlb.c
 index 94db5df..8f2dad9 100644
 --- a/lib/swiotlb.c
 +++ b/lib/swiotlb.c
 @@ -346,10 +346,13 @@ static void swiotlb_bounce(phys_addr_t phys, char 
 *dma_addr, size_t size,
   local_irq_save(flags);
   buffer = kmap_atomic(pfn_to_page(pfn),
KM_BOUNCE_READ);
 - if (dir == DMA_TO_DEVICE)
 + if (dir == DMA_TO_DEVICE) {
   memcpy(dma_addr, buffer + offset, sz);
 - else
 + swiotlb_dma_sync(dma_addr, sz, dir);
 + } else {
 + swiotlb_dma_sync(dma_addr, sz, dir);
   memcpy(buffer + offset, dma_addr, sz);
 + }
   kunmap_atomic(buffer, KM_BOUNCE_READ);
   local_irq_restore(flags);
  
 @@ -359,10 +362,14 @@ static void swiotlb_bounce(phys_addr_t phys, char 
 *dma_addr, size_t size,
   offset = 0;
   }
   } else {
 - if (dir == DMA_TO_DEVICE)
 + if (dir == DMA_TO_DEVICE) {
   memcpy(dma_addr, phys_to_virt(phys), size);
 - else
 + swiotlb_dma_sync(dma_addr, size, dir);
 +
 + } else {
 + swiotlb_dma_sync(dma_addr, size, dir);
   memcpy(phys_to_virt(phys), dma_addr, size);
 + }
   }
  }
  
 @@ -542,6 +549,8 @@ sync_single(struct device *hwdev, char *dma_addr, size_t 
 size,
   }
  }
  
 +#ifndef CONFIG_NOT_COHERENT_CACHE
 +
  void *
  

Re: [LKML] [RFC PATCH v3 05/11] swiotlb: add swiotlb_set_default_size()

2010-03-09 Thread Konrad Rzeszutek Wilk
On Sun, Mar 07, 2010 at 01:11:46PM +0100, Albert Herranz wrote:
 The current SWIOTLB code uses a default of 64MB for the IO TLB area.
 This size can be influenced using a kernel command line parameter swiotlb.
 Unfortunately, the parsing of the kernel command line is done _after_ the
 swiotlb is initialized on some architectures.

Why can't it be moved up? I mean move the parsing of the kernel
parameters before the PCI subsystem?
 
 This patch adds a new function swiotlb_set_default_size() which can be used
 before swiotlb_init() to indicate the desired IO TLB area size in bytes.
 
 This will be used later to implement a smaller IO TLB on the Nintendo Wii
 video game console which just comes with 24MB + 64MB of RAM.

Use the io_tlb_nslabs, which is what swiotlb_init_with_default_size uses
(the passed in argument is only used if io_tlb_nslabs is not set).


 
 Signed-off-by: Albert Herranz albert_herr...@yahoo.es
 CC: linuxppc-dev@lists.ozlabs.org
 CC: linux-ker...@vger.kernel.org
 CC: x...@kernel.org
 CC: linux-i...@vger.kernel.org
 ---
  include/linux/swiotlb.h |2 ++
  lib/swiotlb.c   |   27 ++-
  2 files changed, 28 insertions(+), 1 deletions(-)
 
 diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
 index 3954228..2af6a45 100644
 --- a/include/linux/swiotlb.h
 +++ b/include/linux/swiotlb.h
 @@ -22,6 +22,8 @@ extern int swiotlb_force;
   */
  #define IO_TLB_SHIFT 11
  
 +extern size_t __init swiotlb_set_default_size(size_t size);
 +
  extern void swiotlb_init(int verbose);
  
  extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
 diff --git a/lib/swiotlb.c b/lib/swiotlb.c
 index 8f2dad9..c99512d 100644
 --- a/lib/swiotlb.c
 +++ b/lib/swiotlb.c
 @@ -73,6 +73,11 @@ static char *io_tlb_start, *io_tlb_end;
  static unsigned long io_tlb_nslabs;
  
  /*
 + * Default size for the IO TLB (64MB).
 + */
 +static __initdata size_t io_tlb_default_size = 64 * (120);
 +
 +/*
   * When the IOMMU overflows we return a fallback buffer. This sets the size.
   */
  static unsigned long io_tlb_overflow = 32*1024;
 @@ -117,6 +122,26 @@ setup_io_tlb_npages(char *str)
  __setup(swiotlb=, setup_io_tlb_npages);
  /* make io_tlb_overflow tunable too? */
  
 +/**
 + * swiotlb_set_default_size() - set the default size for the IO TLB
 + * @size:size in bytes of the IO TLB
 + *
 + * A platform can use this function to change the default size of the
 + * IO TLB when the default of 64MB is not suitable.
 + * This function must be called before swiotlb_init().
 + *
 + * Note that on some platforms this is the only way to influence the
 + * size of the IO TLB, as the command line may be parsed _after_ the
 + * IO TLB is initialized.
 + */
 +size_t __init swiotlb_set_default_size(size_t size)
 +{
 + size_t previous_size = io_tlb_default_size;
 +
 + io_tlb_default_size = size;
 + return previous_size;
 +}
 +
  void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
  {
   return alloc_bootmem_low_pages(size);
 @@ -193,7 +218,7 @@ swiotlb_init_with_default_size(size_t default_size, int 
 verbose)
  void __init
  swiotlb_init(int verbose)
  {
 - swiotlb_init_with_default_size(64 * (120), verbose);  /* default to 
 64MB */
 + swiotlb_init_with_default_size(io_tlb_default_size, verbose);
  }
  
  /*
 -- 
 1.6.3.3
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev