Re: [PATCH 7/7] swiotlb: don't override the command line in swiotlb_adjust_size

2021-04-22 Thread Tom Lendacky
On 4/22/21 2:19 AM, Christoph Hellwig wrote:
> When the user specified an explicit swiotlb size on the command line,
> the achitecture code should not override it.
> 
> Fixes: 2cbc2776efe4 ("swiotlb: remove swiotlb_nr_tbl")
> Reported-by: Tom Lendacky 
> Signed-off-by: Christoph Hellwig 

I tested this patchset and I'm not able to get the override via the
command line or via the SEV adjustment function. Looking at the code,
swiotlb_default_size is initialized, so the call to swiotlb_adjust_size()
always returns without setting the new size.

Thanks,
Tom

> ---
>  kernel/dma/swiotlb.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 87d06ddf4753f3..aef02a3825b494 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -106,7 +106,9 @@ void swiotlb_set_max_segment(unsigned int val)
>  
>  unsigned long swiotlb_size_or_default(void)
>  {
> - return swiotlb_default_size;
> + if (swiotlb_default_size)
> + return swiotlb_default_size;
> + return IO_TLB_DEFAULT_SIZE;
>  }
>  
>  void __init swiotlb_adjust_size(unsigned long size)
> @@ -116,6 +118,8 @@ void __init swiotlb_adjust_size(unsigned long size)
>* architectures such as those supporting memory encryption to
>* adjust/expand SWIOTLB size for their use.
>*/
> + if (swiotlb_default_size)
> + return;
>   swiotlb_default_size = ALIGN(size, IO_TLB_SEGSIZE << IO_TLB_SHIFT);
>   pr_info("SWIOTLB bounce buffer size adjusted to %luMB",
>   swiotlb_default_size >> 20);
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 16/16] of: Add plumbing for restricted DMA pool

2021-04-22 Thread Claire Chang
On Thu, Apr 22, 2021 at 4:17 PM Claire Chang  wrote:
>
> If a device is not behind an IOMMU, we look up the device node and set
> up the restricted DMA when the restricted-dma-pool is presented.
>
> Signed-off-by: Claire Chang 
> ---
>  drivers/of/address.c| 25 +
>  drivers/of/device.c |  3 +++
>  drivers/of/of_private.h |  5 +
>  3 files changed, 33 insertions(+)
>
> diff --git a/drivers/of/address.c b/drivers/of/address.c
> index 54f221dde267..fff3adfe4986 100644
> --- a/drivers/of/address.c
> +++ b/drivers/of/address.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1109,6 +1110,30 @@ bool of_dma_is_coherent(struct device_node *np)
>  }
>  EXPORT_SYMBOL_GPL(of_dma_is_coherent);
>
> +int of_dma_set_restricted_buffer(struct device *dev)
> +{
> +   struct device_node *node;
> +   int count, i;
> +
> +   if (!dev->of_node)
> +   return 0;
> +
> +   count = of_property_count_elems_of_size(dev->of_node, "memory-region",
> +   sizeof(phandle));
> +   for (i = 0; i < count; i++) {
> +   node = of_parse_phandle(dev->of_node, "memory-region", i);
> +   /* There might be multiple memory regions, but only one
> +* restriced-dma-pool region is allowed.
> +*/
> +   if (of_device_is_compatible(node, "restricted-dma-pool") &&
> +   of_device_is_available(node))
> +   return of_reserved_mem_device_init_by_idx(
> +   dev, dev->of_node, i);
> +   }
> +
> +   return 0;
> +}
> +
>  /**
>   * of_mmio_is_nonposted - Check if device uses non-posted MMIO
>   * @np:device node
> diff --git a/drivers/of/device.c b/drivers/of/device.c
> index c5a9473a5fb1..d8d865223e51 100644
> --- a/drivers/of/device.c
> +++ b/drivers/of/device.c
> @@ -165,6 +165,9 @@ int of_dma_configure_id(struct device *dev, struct 
> device_node *np,
>
> arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
>
> +   if (!iommu)
> +   return of_dma_set_restricted_buffer(dev);
> +
> return 0;
>  }
>  EXPORT_SYMBOL_GPL(of_dma_configure_id);
> diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
> index d717efbd637d..e9237f5eff48 100644
> --- a/drivers/of/of_private.h
> +++ b/drivers/of/of_private.h
> @@ -163,12 +163,17 @@ struct bus_dma_region;
>  #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
>  int of_dma_get_range(struct device_node *np,
> const struct bus_dma_region **map);
> +int of_dma_set_restricted_buffer(struct device *dev);
>  #else
>  static inline int of_dma_get_range(struct device_node *np,
> const struct bus_dma_region **map)
>  {
> return -ENODEV;
>  }
> +static inline int of_dma_get_restricted_buffer(struct device *dev)

This one should be of_dma_set_restricted_buffer. Sorry for the typo.

> +{
> +   return -ENODEV;
> +}
>  #endif
>
>  #endif /* _LINUX_OF_PRIVATE_H */
> --
> 2.31.1.368.gbe11c130af-goog
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Jason Gunthorpe
On Thu, Apr 22, 2021 at 04:38:08PM -0600, Alex Williamson wrote:

> Because it's fundamental to the isolation of the device?  What you're
> proposing doesn't get around the group issue, it just makes it implicit
> rather than explicit in the uapi.

I'm not even sure it makes it explicit or implicit, it just takes away
the FD.

There are four group IOCTLs, I see them mapping to /dev/ioasid follows:
 VFIO_GROUP_GET_STATUS - 
   + VFIO_GROUP_FLAGS_CONTAINER_SET is fairly redundant
   + VFIO_GROUP_FLAGS_VIABLE could be in a new sysfs under
 kernel/iomm_groups, or could be an IOCTL on /dev/ioasid
   IOASID_ALL_DEVICES_VIABLE

 VFIO_GROUP_SET_CONTAINER -
   + This happens implicitly when the device joins the IOASID
 so it gets moved to the vfio_device FD:
  ioctl(vifo_device_fd, JOIN_IOASID_FD, ioasifd)

 VFIO_GROUP_UNSET_CONTAINER -
   + Also moved to the vfio_device FD, opposite of JOIN_IOASID_FD

 VFIO_GROUP_GET_DEVICE_FD -
   + Replaced by opening /dev/vfio/deviceX
 Learn the deviceX which will be the cdev sysfs shows as:
  /sys/devices/pci:00/:00:01.0/:01:00.0/vfio/deviceX/dev
Open /dev/vfio/deviceX

> > How do we model the VFIO group security concept to something like
> > VDPA?
> 
> Is it really a "VFIO group security concept"?  We're reflecting the
> reality of the hardware, not all devices are fully isolated.  

Well, exactly.

/dev/ioasid should understand the group concept somehow, otherwise it
is incomplete and maybe even security broken.

So, how do I add groups to, say, VDPA in a way that makes sense? The
only answer I come to is broadly what I outlined here - make
/dev/ioasid do all the group operations, and do them when we enjoin
the VDPA device to the ioasid.

Once I have solved all the groups problems with the non-VFIO users,
then where does that leave VFIO? Why does VFIO need a group FD if
everyone else doesn't?

> IOMMU group.  This is the reality that any userspace driver needs to
> play in, it doesn't magically go away because we drop the group file
> descriptor.  

I'm not saying it does, I'm saying it makes the uAPI more regular and
easier to fit into /dev/ioasid without the group FD.

> It only makes the uapi more difficult to use correctly because
> userspace drivers need to go outside of the uapi to have any idea
> that this restriction exists.  

I don't think it makes any substantive difference one way or the
other.

With the group FD: the userspace has to read sysfs, find the list of
devices in the group, open the group fd, create device FDs for each
device using the name from sysfs.

Starting from a BDF the general pseudo code is
 group_path = readlink("/sys/bus/pci/devices/BDF/iommu_group")
 group_name = basename(group_path)
 group_fd = open("/dev/vfio/"+group_name)
 device_fd = ioctl(VFIO_GROUP_GET_DEVICE_FD, BDF);

Without the group FD: the userspace has to read sysfs, find the list
of devices in the group and then open the device-specific cdev (found
via sysfs) and link them to a /dev/ioasid FD.

Starting from a BDF the general pseudo code is:
 device_name = first_directory_of("/sys/bus/pci/devices/BDF/vfio/")
 device_fd = open("/dev/vfio/"+device_name)
 ioasidfd = open("/dev/ioasid")
 ioctl(device_fd, JOIN_IOASID_FD, ioasidfd)

These two routes can have identical outcomes and identical security
checks.

In both cases if userspace wants a list of BDFs in the same group as
the BDF it is interested in:
   readdir("/sys/bus/pci/devices/BDF/iommu_group/devices")

It seems like a very small difference to me.

I still don't see how the group restriction gets surfaced to the
application through the group FD. The applications I looked through
just treat the group FD as a step on their way to get the device_fd.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Alex Williamson
On Thu, 22 Apr 2021 17:00:24 -0300
Jason Gunthorpe  wrote:

> On Thu, Apr 22, 2021 at 01:37:47PM -0600, Alex Williamson wrote:
> 
> > If by "classic" you mean conventional PCI bus, then this is much worse
> > than simply "at risk".  The IOMMU cannot differentiate devices behind a
> > PCIe-to-PCI bridge, so the moment you turn on the IOMMU context for the
> > NIC, the address space for your HBA is pulled out from under it.
> 
> Yes, I understand this, but this is fine and not really surprising if
> the HD device is just forced to remain "unusued"
> 
> To my mind the bigger issue is the NIC now has access to the HD and
> nobody really raised an alarm unless the HD happened to have a kernel
> driver bound.
> 
> > the vfio world, the NIC and HBA are grouped and managed together, the
> > user cannot change the IOMMU context of a group unless all of the
> > devices in the group are "viable", ie. they are released from any host
> > drivers.  
> 
> Yes, I don't propose to change any of that, I just suggest to make the
> 'change the IOMMU context" into "join a /dev/ioasid fd"
> 
> All devices in the group have to be joined to the same ioasid or, with
> the flag, left "unused" with no kernel driver. 
> 
> This is the same viability test VFIO is doing now, just moved slightly
> in the programming flow.
> 
> > vfio users are extremely aware of grouping, they understand the model,
> > if not always the reason for the grouping.  You only need to look at
> > r/VFIO to find various lsgroup scripts and kernel patches to manipulate
> > grouping.  The visibility to the user is valuable imo.  
> 
> I don't propose to remove visibility, sysfs and the lsgroup scripts
> should all still be there.
> 
> I'm just acknowledging reality that the user command line experiance
> we have is focused on single BDFs not on groups. The user only sees
> the group idea when things explode, so why do we have it as such an
> integral part of the programming model?

Because it's fundamental to the isolation of the device?  What you're
proposing doesn't get around the group issue, it just makes it implicit
rather than explicit in the uapi.  For what?  Some ideal notion that
every device should be isolated at the expense of userspace drivers
that then fail randomly because they didn't take into account groups
because it's not part of the uapi?

> > >ioctl(vifo_device_fd, JOIN_IOASID_FD, ioasifd)
> > >[..]
> > >ioctl(vfio_device, ATTACH_IOASID, gpa_ioasid_id) == ENOPERM
> > > 
> > > I would feel comfortable if the ATTACH_IOASID fails by default if all
> > > devices in the group have not been joined to the same ioasidfd.  
> > 
> > And without a group representation to userspace, how would a user know
> > to resolve that?  
> 
> Userspace can continue to read sysfs files that show the group
> relation.
> 
> I'm only talking about the group char device and FD.
> 
> > So the group still exist in sysfs, they just don't have vfio
> > representations?  An implicit grouping does what, automatically unbind
> > the devices, so an admin gives a user access to the NIC but their HBA
> > device disappears because they were implicitly linked?
> 
> It does exactly the same thing as opening the VFIO group FD and
> instantiating a single device FD does today.
> 
> Most software, like dpdk, automatically deduces the VFIO group from
> the commandline BDF, I'm mainly suggesting we move that deduction from
> userspace software to kernel software.
> 
> > basis ownership on the group, if a user owns the group but the group is
> > not viable because a device is still bound to another kernel driver,
> > the use can't do anything.  Implicitly snarfing up subtly affected
> > devices is bad.  
> 
> The user would get an /dev/ioasid join failure just like they get a
> failure from VFIO_GROUP_SET_CONTAINER (?) today that reflects the
> group is not viable.
> 
> Otherwise what is the alternative?
> 
> How do we model the VFIO group security concept to something like
> VDPA?

Is it really a "VFIO group security concept"?  We're reflecting the
reality of the hardware, not all devices are fully isolated.  An IOMMU
group is the smallest set of devices we believe are isolated from all
other sets of devices.  VFIO groups simply reflect that notion of an
IOMMU group.  This is the reality that any userspace driver needs to
play in, it doesn't magically go away because we drop the group file
descriptor.  It only makes the uapi more difficult to use correctly
because userspace drivers need to go outside of the uapi to have any
idea that this restriction exists.  Thanks,

Alex

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Jason Gunthorpe
On Thu, Apr 22, 2021 at 01:37:47PM -0600, Alex Williamson wrote:

> If by "classic" you mean conventional PCI bus, then this is much worse
> than simply "at risk".  The IOMMU cannot differentiate devices behind a
> PCIe-to-PCI bridge, so the moment you turn on the IOMMU context for the
> NIC, the address space for your HBA is pulled out from under it.  

Yes, I understand this, but this is fine and not really surprising if
the HD device is just forced to remain "unusued"

To my mind the bigger issue is the NIC now has access to the HD and
nobody really raised an alarm unless the HD happened to have a kernel
driver bound.

> the vfio world, the NIC and HBA are grouped and managed together, the
> user cannot change the IOMMU context of a group unless all of the
> devices in the group are "viable", ie. they are released from any host
> drivers.

Yes, I don't propose to change any of that, I just suggest to make the
'change the IOMMU context" into "join a /dev/ioasid fd"

All devices in the group have to be joined to the same ioasid or, with
the flag, left "unused" with no kernel driver. 

This is the same viability test VFIO is doing now, just moved slightly
in the programming flow.

> vfio users are extremely aware of grouping, they understand the model,
> if not always the reason for the grouping.  You only need to look at
> r/VFIO to find various lsgroup scripts and kernel patches to manipulate
> grouping.  The visibility to the user is valuable imo.

I don't propose to remove visibility, sysfs and the lsgroup scripts
should all still be there.

I'm just acknowledging reality that the user command line experiance
we have is focused on single BDFs not on groups. The user only sees
the group idea when things explode, so why do we have it as such an
integral part of the programming model?

> >ioctl(vifo_device_fd, JOIN_IOASID_FD, ioasifd)
> >[..]
> >ioctl(vfio_device, ATTACH_IOASID, gpa_ioasid_id) == ENOPERM
> > 
> > I would feel comfortable if the ATTACH_IOASID fails by default if all
> > devices in the group have not been joined to the same ioasidfd.
> 
> And without a group representation to userspace, how would a user know
> to resolve that?

Userspace can continue to read sysfs files that show the group
relation.

I'm only talking about the group char device and FD.

> So the group still exist in sysfs, they just don't have vfio
> representations?  An implicit grouping does what, automatically unbind
> the devices, so an admin gives a user access to the NIC but their HBA
> device disappears because they were implicitly linked?  

It does exactly the same thing as opening the VFIO group FD and
instantiating a single device FD does today.

Most software, like dpdk, automatically deduces the VFIO group from
the commandline BDF, I'm mainly suggesting we move that deduction from
userspace software to kernel software.

> basis ownership on the group, if a user owns the group but the group is
> not viable because a device is still bound to another kernel driver,
> the use can't do anything.  Implicitly snarfing up subtly affected
> devices is bad.

The user would get an /dev/ioasid join failure just like they get a
failure from VFIO_GROUP_SET_CONTAINER (?) today that reflects the
group is not viable.

Otherwise what is the alternative?

How do we model the VFIO group security concept to something like
VDPA?

How do you reconcile the ioasid security model with the VFIO container
and group FDs?

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Alex Williamson
On Thu, 22 Apr 2021 14:57:15 -0300
Jason Gunthorpe  wrote:
> > > The security rule for isolation is that once a device is attached to a
> > > /dev/ioasid fd then all other devices in that security group must be
> > > attached to the same ioasid FD or left unused.  
> > 
> > Sounds like a group...  Note also that if those other devices are not
> > isolated from the user's device, the user could manipulate "unused"
> > devices via DMA.  So even unused devices should be within the same
> > IOMMU context... thus attaching groups to IOMMU domains.  
> 
> That is a very interesting point. So, say, in the classic PCI bus
> world if I have a NIC and HD on my PCI bus and both are in the group,
> I assign the NIC to a /dev/ioasid & VFIO then it is possible to use
> the NIC to access the HD via DMA
> 
> And here you want a more explicit statement that the HD is at risk by
> using the NIC?

If by "classic" you mean conventional PCI bus, then this is much worse
than simply "at risk".  The IOMMU cannot differentiate devices behind a
PCIe-to-PCI bridge, so the moment you turn on the IOMMU context for the
NIC, the address space for your HBA is pulled out from under it.  In
the vfio world, the NIC and HBA are grouped and managed together, the
user cannot change the IOMMU context of a group unless all of the
devices in the group are "viable", ie. they are released from any host
drivers.

> Honestly, I'm not sure the current group FD is actually showing that
> very strongly - though I get the point it is modeled in the sysfs and
> kind of implicit in the API - we evolved things in a way where most
> actual applications are taking in a PCI BDF from the user, not a group
> reference. So the actual security impact seems lost on the user.

vfio users are extremely aware of grouping, they understand the model,
if not always the reason for the grouping.  You only need to look at
r/VFIO to find various lsgroup scripts and kernel patches to manipulate
grouping.  The visibility to the user is valuable imo.

> Along my sketch if we have:
> 
>ioctl(vifo_device_fd, JOIN_IOASID_FD, ioasifd)
>[..]
>ioctl(vfio_device, ATTACH_IOASID, gpa_ioasid_id) == ENOPERM
> 
> I would feel comfortable if the ATTACH_IOASID fails by default if all
> devices in the group have not been joined to the same ioasidfd.

And without a group representation to userspace, how would a user know
to resolve that?

> So in the NIC&HD example the application would need to do:
> 
>ioasid_fd = open("/dev/ioasid");
>nic_device_fd = open("/dev/vfio/device0")
>hd_device_fd = open("/dev/vfio/device1")
>
>ioctl(nic_device_fd, JOIN_IOASID_FD, ioasifd)
>ioctl(hd_device_fd, JOIN_IOASID_FD, ioasifd)
>[..]
>ioctl(nice_device, ATTACH_IOASID, gpa_ioasid_id) == SUCCESS
> 
> Now the security relation is forced by the kernel to be very explicit.

But not discoverable to the user.

> However to keep current semantics, I'd suggest a flag on
> JOIN_IOASID_FD called "IOASID_IMPLICIT_GROUP" which has the effect of
> allowing the ATTACH_IOASID to succeed without the user having to
> explicitly join all the group devices. This is analogous to the world
> we have today of opening the VFIO group FD but only instantiating one
> device FD.
> 
> In effect the ioasid FD becomes the group and the numbered IOASID's
> inside the FD become the /dev/vfio/vfio objects - we don't end up with
> fewer objects in the system, they just have different uAPI
> presentations.
> 
> I'd envision applications like DPDK that are BDF centric to use the
> first API with some '--allow-insecure-vfio' flag to switch on the
> IOASID_IMPLICIT_GROUP. Maybe good applications would also print:
>   "Danger Will Robinson these PCI BDFs [...] are also at risk"
> When the switch is used by parsing the sysfs

So the group still exist in sysfs, they just don't have vfio
representations?  An implicit grouping does what, automatically unbind
the devices, so an admin gives a user access to the NIC but their HBA
device disappears because they were implicitly linked?  That's why vfio
basis ownership on the group, if a user owns the group but the group is
not viable because a device is still bound to another kernel driver,
the use can't do anything.  Implicitly snarfing up subtly affected
devices is bad.

> > > Thus /dev/ioasid also becomes the unit of security and the IOMMU
> > > subsystem level becomes aware of and enforces the group security
> > > rules. Userspace does not need to "see" the group  
> > 
> > What tools does userspace have to understand isolation of individual
> > devices without groups?  
> 
> I think we can continue to show all of this group information in sysfs
> files, it just doesn't require application code to open a group FD.
> 
> This becomes relavent the more I think about it - elmininating the
> group and container FD uAPI by directly creating the device FD also
> sidesteps questions about how to model these objects in a /dev/ioasid
> only world. We simply don't have th

RE: [PATCH] iommu/amd: Add amd_iommu=force_enable option

2021-04-22 Thread Deucher, Alexander
[AMD Public Use]

> -Original Message-
> From: Joerg Roedel 
> Sent: Thursday, April 22, 2021 9:07 AM
> To: iommu@lists.linux-foundation.org
> Cc: Joerg Roedel ; Will Deacon ;
> Deucher, Alexander ;
> d1nu...@protonmail.com; linux-...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Joerg Roedel 
> Subject: [PATCH] iommu/amd: Add amd_iommu=force_enable option
> 
> From: Joerg Roedel 
> 
> Add this option to enable the IOMMU on platforms like AMD Stoney, where
> the kernel usually disables it because it may cause problems in some
> scenarios.
> 
> Signed-off-by: Joerg Roedel 

Acked-by: Alex Deucher 

> ---
>  Documentation/admin-guide/kernel-parameters.txt | 3 +++
>  drivers/iommu/amd/init.c| 7 +++
>  2 files changed, 10 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt
> b/Documentation/admin-guide/kernel-parameters.txt
> index 04545725f187..c9573f726707 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -303,6 +303,9 @@
> allowed anymore to lift isolation
> requirements as needed. This
> option
> does not override iommu=pt
> + force_enable - Force enable the IOMMU on
> platforms known
> +to be buggy with IOMMU enabled. Use
> this
> +option with care.
> 
>   amd_iommu_dump= [HW,X86-64]
>   Enable AMD IOMMU driver option to dump the ACPI
> table diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
> index 321f5906e6ed..3e2295d3b3e2 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -155,6 +155,7 @@ static int amd_iommu_xt_mode =
> IRQ_REMAP_XAPIC_MODE;
> 
>  static bool amd_iommu_detected;
>  static bool __initdata amd_iommu_disabled;
> +static bool __initdata amd_iommu_force_enable;
>  static int amd_iommu_target_ivhd_type;
> 
>  u16 amd_iommu_last_bdf;  /* largest PCI device id we
> have
> @@ -2882,6 +2883,9 @@ static bool detect_ivrs(void)
> 
>   acpi_put_table(ivrs_base);
> 
> + if (amd_iommu_force_enable)
> + goto out;
> +
>   /* Don't use IOMMU if there is Stoney Ridge graphics */
>   for (i = 0; i < 32; i++) {
>   u32 pci_id;
> @@ -2893,6 +2897,7 @@ static bool detect_ivrs(void)
>   }
>   }
> 
> +out:
>   /* Make sure ACS will be enabled during PCI probe */
>   pci_request_acs();
> 
> @@ -3148,6 +3153,8 @@ static int __init parse_amd_iommu_options(char
> *str)
>   for (; *str; ++str) {
>   if (strncmp(str, "fullflush", 9) == 0)
>   amd_iommu_unmap_flush = true;
> + if (strncmp(str, "force_enable", 12) == 0)
> + amd_iommu_force_enable = true;
>   if (strncmp(str, "off", 3) == 0)
>   amd_iommu_disabled = true;
>   if (strncmp(str, "force_isolation", 15) == 0)
> --
> 2.31.1
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Jason Gunthorpe
On Thu, Apr 22, 2021 at 11:13:37AM -0600, Alex Williamson wrote:
> I'm suggesting that if we're replacing the container/group model with
> an ioasid then we're effectively creating a new thing that really only
> retains the vfio device uapi.

Yes, I think that is a fair assessment, but not necessarily bad.

The VFIO device uAPI is really the thing that is unique to VFIO and
cannot be re-used anyplace else, in my assesment this is what vfio
*is*, and the series I've been working on make it more obvious how
broad that statement really is.

> > In any event, it does look like today we'd expect the SPAPR stuff
> > would be done through the normal iommu APIs, perhaps enhanced a bit,
> > which makes me suspect an enhanced type1 can implement SPAPR.
> 
> David Gibson has argued for some time that SPAPR could be handled via a
> converged type1 model.  We has mapped that out at one point,
> essentially a "type2", but neither of us had any bandwidth to pursue it.

Cool! Well, let's put a pin in it, I don't think revising SPAPR should
be a pre-condition for anything here - but we can all agree than an
ideal world would be able to access SPAPR functionality from
devices/iommu and /dev/ioasid

And it would be nice to map this out enough to provide enough
preperation in the new /dev/ioasid uAPI. For instance I saw the only
SPAPR specific stuff in DPDK was to preset the IOVA range that the
IOASID would support. This would be trivial to add and may have
benifits to other IOMMUS by reducing the number of translation levels
or somethign.
 
> Right, but I don't see that implies it cannot work within the vfio
> IOMMU model.  Currently when an IOMMU is set, the /dev/vfio/vfio
> container becomes a conduit for file ops from the container to be
> forwarded to the IOMMU.  But that's in part because the user doesn't
> have another object to interact with the IOMMU.  It's entirely possible
> that with an ioasid shim, the user would continue to interact directly
> with the /dev/ioasid fd for IOMMU manipulation and only use
> VFIO_SET_IOMMU to associate a vfio container to that ioasid.

I am looking at this in two directions, the first is if we have
/dev/ioasid how do we connect it to VFIO? And here I aruge we need new
device IOCTLs and ideally a VFIO world that does not have a vestigial
container FD at all.

This is because /dev/ioasid will have to be multi-IOASID and it just
does not fit well into the VFIO IOMMU pluggable model at all - or at
least trying to make it fit will defeat the point of having it in the
first place.

This does not seem to be a big deal - the required device IOCTLs
should be small and having two code paths isn't going to be an
exploding complexity.

The second direction is how do we keep /dev/vfio/vfio entire uAPI
without duplicating a lot of code. There is where building a ioasid
back end or making ioasid == vfio are areas to look at.

> vfio really just wants to be able to attach groups to an address space
> to consider them isolated, everything else about the IOMMU API could
> happen via a new ioasid file descriptor representing that context, ie.
> vfio handles the group ownership and device access, ioasid handles the
> actual mappings.

Right, exactly.
 
> > Do we have container because the /dev/vfio/vfio can hold only a single
> > page table so we need to swap containers sometimes?
> 
> The container represents an IOMMU address space, which can be shared by
> multiple groups, where each group may contain one or more devices.
> Swapping a container would require releasing all the devices (the user
> cannot have access to a non-isolated device), then a group could be
> moved from one container to another.

So, basically, the answer is yes.

Having the container FD hold a single IOASID forced the group FD to
exist because we can't maintain the security object of a group in the
container FD if the work flow is to swap the container FD.

Here what I suggest is to merge the group security and the multiple
"IOMMU address space" concept into one FD. The /dev/ioasid would have
multiple page tables objects within it called IOASID'd and each IOASID
effectively represents what /dev/vfio/vfio does today.

We can assign any device joined to a /dev/ioasid FD to any IOASID inside
that FD, dynamically.

> > The security rule for isolation is that once a device is attached to a
> > /dev/ioasid fd then all other devices in that security group must be
> > attached to the same ioasid FD or left unused.
> 
> Sounds like a group...  Note also that if those other devices are not
> isolated from the user's device, the user could manipulate "unused"
> devices via DMA.  So even unused devices should be within the same
> IOMMU context... thus attaching groups to IOMMU domains.

That is a very interesting point. So, say, in the classic PCI bus
world if I have a NIC and HD on my PCI bus and both are in the group,
I assign the NIC to a /dev/ioasid & VFIO then it is possible to use
the NIC to access the HD via DMA

And here you want a

Re: [PATCH v2 1/4] dt-bindings: iommu: rockchip: Convert IOMMU to DT schema

2021-04-22 Thread Ezequiel Garcia
(Adding Kever)

Hi Benjamin,

Thanks a lot for working on this, it looks amazing. Together with the great work
that Rockchip is doing, it seems RK3566/RK3568 will have decent support very 
soon.

One comment here:

On Thu, 2021-04-22 at 16:15 +0200, Benjamin Gaignard wrote:
> Convert Rockchip IOMMU to DT schema
> 
> Signed-off-by: Benjamin Gaignard 
> ---
> version 2:
>  - Change maintainer
>  - Change reg maxItems
>  - Change interrupt maxItems
> 
>  .../bindings/iommu/rockchip,iommu.txt | 38 -
>  .../bindings/iommu/rockchip,iommu.yaml    | 79 +++
>  2 files changed, 79 insertions(+), 38 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>  create mode 100644 
> Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> 
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt 
> b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> deleted file mode 100644
> index 6ecefea1c6f9..
> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> +++ /dev/null
> @@ -1,38 +0,0 @@
> -Rockchip IOMMU
> -==
> -
> -A Rockchip DRM iommu translates io virtual addresses to physical addresses 
> for
> -its master device.  Each slave device is bound to a single master device, and
> -shares its clocks, power domain and irq.
> -
> -Required properties:
> -- compatible  : Should be "rockchip,iommu"
> -- reg : Address space for the configuration registers
> -- interrupts  : Interrupt specifier for the IOMMU instance
> -- interrupt-names : Interrupt name for the IOMMU instance
> -- #iommu-cells    : Should be <0>.  This indicates the iommu is a
> -    "single-master" device, and needs no additional 
> information
> -    to associate with its master device.  See:
> -    Documentation/devicetree/bindings/iommu/iommu.txt
> -- clocks  : A list of clocks required for the IOMMU to be accessible 
> by
> -    the host CPU.
> -- clock-names : Should contain the following:
> -   "iface" - Main peripheral bus clock (PCLK/HCL) (required)
> -   "aclk"  - AXI bus clock (required)
> -
> -Optional properties:
> -- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
> -  Some mmu instances may produce unexpected 
> results
> -  when the reset operation is used.
> -
> -Example:
> -
> -   vopl_mmu: iommu@ff940300 {
> -   compatible = "rockchip,iommu";
> -   reg = <0xff940300 0x100>;
> -   interrupts = ;
> -   interrupt-names = "vopl_mmu";
> -   clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
> -   clock-names = "aclk", "iface";
> -   #iommu-cells = <0>;
> -   };
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
> b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> new file mode 100644
> index ..0db208cf724a
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> @@ -0,0 +1,79 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Rockchip IOMMU
> +
> +maintainers:
> +  - Heiko Stuebner 
> +
> +description: |+
> +  A Rockchip DRM iommu translates io virtual addresses to physical addresses 
> for
> +  its master device. Each slave device is bound to a single master device and
> +  shares its clocks, power domain and irq.
> +
> +  For information on assigning IOMMU controller to its peripheral devices,
> +  see generic IOMMU bindings.
> +
> +properties:
> +  compatible:
> +    const: rockchip,iommu
> +
> +  reg:
> +    minItems: 1
> +    maxItems: 2
> +
> +  interrupts:
> +    minItems: 1
> +    maxItems: 2
> +
> +  interrupt-names:
> +    minItems: 1
> +    maxItems: 2
> +

AFAICS, the driver supports handling multiple MMUs, and there's one reg and
interrupt cell for each MMU. IOW, there's no requirement that maxItems is 2.

Is there any way we can describe that? Or maybe just allow a bigger maximum?

Thanks,
Ezequiel

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Alex Williamson
On Wed, 21 Apr 2021 20:03:01 -0300
Jason Gunthorpe  wrote:

> On Wed, Apr 21, 2021 at 01:33:12PM -0600, Alex Williamson wrote:
> 
> > > I still expect that VFIO_GROUP_SET_CONTAINER will be used to connect
> > > /dev/{ioasid,vfio} to the VFIO group and all the group and device
> > > logic stays inside VFIO.  
> > 
> > But that group and device logic is also tied to the container, where
> > the IOMMU backend is the interchangeable thing that provides the IOMMU
> > manipulation for that container.  
> 
> I think that is an area where the discussion would need to be focused.
> 
> I don't feel very prepared to have it in details, as I haven't dug
> into all the group and iommu micro-operation very much.
> 
> But, it does seem like the security concept that VFIO is creating with
> the group also has to be present in the lower iommu layer too.
> 
> With different subsystems joining devices to the same ioasid's we
> still have to enforce the security propery the vfio group is creating.
> 
> > If you're using VFIO_GROUP_SET_CONTAINER to associate a group to a
> > /dev/ioasid, then you're really either taking that group outside of
> > vfio or you're re-implementing group management in /dev/ioasid.   
> 
> This sounds right.
> 
> > > Everything can be switched to ioasid_container all down the line. If
> > > it wasn't for PPC this looks fairly simple.  
> > 
> > At what point is it no longer vfio?  I'd venture to say that replacing
> > the container rather than invoking a different IOMMU backend is that
> > point.  
> 
> sorry, which is no longer vfio?

I'm suggesting that if we're replacing the container/group model with
an ioasid then we're effectively creating a new thing that really only
retains the vfio device uapi.

> > > Since getting rid of PPC looks a bit hard, we'd be stuck with
> > > accepting a /dev/ioasid and then immediately wrappering it in a
> > > vfio_container an shimming it through a vfio_iommu_ops. It is not
> > > ideal at all, but in my look around I don't see a major problem if
> > > type1 implementation is moved to live under /dev/ioasid.  
> > 
> > But type1 is \just\ an IOMMU backend, not "/dev/vfio".  Given that
> > nobody flinched at removing NVLink support, maybe just deprecate SPAPR
> > now and see if anyone objects ;)  
> 
> Would simplify this project, but I wonder :)
> 
> In any event, it does look like today we'd expect the SPAPR stuff
> would be done through the normal iommu APIs, perhaps enhanced a bit,
> which makes me suspect an enhanced type1 can implement SPAPR.

David Gibson has argued for some time that SPAPR could be handled via a
converged type1 model.  We has mapped that out at one point,
essentially a "type2", but neither of us had any bandwidth to pursue it.

> I say this because the SPAPR looks quite a lot like PASID when it has
> APIs for allocating multiple tables and other things. I would be
> interested to hear someone from IBM talk about what it is doing and
> how it doesn't fit into today's IOMMU API.

[Cc David, Alexey]

> It is very old and the iommu world has advanced tremendously lately,
> maybe I'm too optimisitic?
> 
> > > We end up with a ioasid.h that basically has the vfio_iommu_type1 code
> > > lightly recast into some 'struct iommu_container' and a set of
> > > ioasid_* function entry points that follow vfio_iommu_driver_ops_type1:
> > >   ioasid_attach_group
> > >   ioasid_detatch_group
> > >   ioasid_
> > >   ioasid_read/ioasid_write  
> > 
> > Again, this looks like a vfio IOMMU backend.  What are we accomplishing
> > by replacing /dev/vfio with /dev/ioasid versus some manipulation of
> > VFIO_SET_IOMMU accepting a /dev/ioasid fd?  
> 
> The point of all of this is to make the user api for the IOMMU
> cross-subsystem. It is not a vfio IOMMU backend, it is moving the
> IOMMU abstraction from VFIO into the iommu framework and giving the
> iommu framework a re-usable user API.

Right, but I don't see that implies it cannot work within the vfio
IOMMU model.  Currently when an IOMMU is set, the /dev/vfio/vfio
container becomes a conduit for file ops from the container to be
forwarded to the IOMMU.  But that's in part because the user doesn't
have another object to interact with the IOMMU.  It's entirely possible
that with an ioasid shim, the user would continue to interact directly
with the /dev/ioasid fd for IOMMU manipulation and only use
VFIO_SET_IOMMU to associate a vfio container to that ioasid.

> My ideal outcome would be for VFIO to use only the new iommu/ioasid
> API and have no iommu pluggability at all. The iommu subsystem
> provides everything needed to VFIO, and provides it equally to VDPA
> and everything else.

As above, we don't necessarily need to have the vfio container be the
access mechanism for the IOMMU, it can become just an means to
association the container with an IOMMU.  This has quite a few
transitional benefits.

> drivers/vfio/ becomes primarily about 'struct vfio_device' and
> everything related to its IOCTL interface.
> 
> dr

Re: [PATCH v14 00/13] SMMUv3 Nested Stage Setup (IOMMU part)

2021-04-22 Thread Sumit Gupta
Hi Eric,
I have validated the v14 of the patch series from branch 
"jean_sva_current_2stage_v14".
Verfied nested translations with NVMe PCI device assigned to Qemu 5.2 Guest.
Had to revert patch "mm: notify remote TLBs when dirtying a PTE".

Tested-by: Sumit Gupta 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/4] dt-bindings: iommu: rockchip: Convert IOMMU to DT schema

2021-04-22 Thread Benjamin Gaignard
Convert Rockchip IOMMU to DT schema

Signed-off-by: Benjamin Gaignard 
---
version 2:
 - Change maintainer
 - Change reg maxItems
 - Change interrupt maxItems

 .../bindings/iommu/rockchip,iommu.txt | 38 -
 .../bindings/iommu/rockchip,iommu.yaml| 79 +++
 2 files changed, 79 insertions(+), 38 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
deleted file mode 100644
index 6ecefea1c6f9..
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Rockchip IOMMU
-==
-
-A Rockchip DRM iommu translates io virtual addresses to physical addresses for
-its master device.  Each slave device is bound to a single master device, and
-shares its clocks, power domain and irq.
-
-Required properties:
-- compatible  : Should be "rockchip,iommu"
-- reg : Address space for the configuration registers
-- interrupts  : Interrupt specifier for the IOMMU instance
-- interrupt-names : Interrupt name for the IOMMU instance
-- #iommu-cells: Should be <0>.  This indicates the iommu is a
-"single-master" device, and needs no additional information
-to associate with its master device.  See:
-Documentation/devicetree/bindings/iommu/iommu.txt
-- clocks  : A list of clocks required for the IOMMU to be accessible by
-the host CPU.
-- clock-names : Should contain the following:
-   "iface" - Main peripheral bus clock (PCLK/HCL) (required)
-   "aclk"  - AXI bus clock (required)
-
-Optional properties:
-- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
-  Some mmu instances may produce unexpected results
-  when the reset operation is used.
-
-Example:
-
-   vopl_mmu: iommu@ff940300 {
-   compatible = "rockchip,iommu";
-   reg = <0xff940300 0x100>;
-   interrupts = ;
-   interrupt-names = "vopl_mmu";
-   clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
-   clock-names = "aclk", "iface";
-   #iommu-cells = <0>;
-   };
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
new file mode 100644
index ..0db208cf724a
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip IOMMU
+
+maintainers:
+  - Heiko Stuebner 
+
+description: |+
+  A Rockchip DRM iommu translates io virtual addresses to physical addresses 
for
+  its master device. Each slave device is bound to a single master device and
+  shares its clocks, power domain and irq.
+
+  For information on assigning IOMMU controller to its peripheral devices,
+  see generic IOMMU bindings.
+
+properties:
+  compatible:
+const: rockchip,iommu
+
+  reg:
+minItems: 1
+maxItems: 2
+
+  interrupts:
+minItems: 1
+maxItems: 2
+
+  interrupt-names:
+minItems: 1
+maxItems: 2
+
+  clocks:
+items:
+  - description: Core clock
+  - description: Interface clock
+
+  clock-names:
+items:
+  - const: aclk
+  - const: iface
+
+  "#iommu-cells":
+const: 0
+
+  rockchip,disable-mmu-reset:
+$ref: /schemas/types.yaml#/definitions/flag
+description: |
+  Do not use the mmu reset operation.
+  Some mmu instances may produce unexpected results
+  when the reset operation is used.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+
+vopl_mmu: iommu@ff940300 {
+  compatible = "rockchip,iommu";
+  reg = <0xff940300 0x100>;
+  interrupts = ;
+  interrupt-names = "vopl_mmu";
+  clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+  clock-names = "aclk", "iface";
+  #iommu-cells = <0>;
+};
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/4] dt-bindings: iommu: rockchip: Add compatible for v2

2021-04-22 Thread Benjamin Gaignard
Add compatible for the second version of IOMMU hardware block.
RK356x IOMMU can also be link to a power domain.

Signed-off-by: Benjamin Gaignard 
---
version 2:
 - Add power-domains property

 .../devicetree/bindings/iommu/rockchip,iommu.yaml  | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
index 0db208cf724a..e54353ccd1ec 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -19,7 +19,9 @@ description: |+
 
 properties:
   compatible:
-const: rockchip,iommu
+enum:
+  - rockchip,iommu
+  - rockchip,iommu-v2
 
   reg:
 minItems: 1
@@ -46,6 +48,9 @@ properties:
   "#iommu-cells":
 const: 0
 
+  power-domains:
+maxItems: 1
+
   rockchip,disable-mmu-reset:
 $ref: /schemas/types.yaml#/definitions/flag
 description: |
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/4] ARM: dts: rockchip: rk322x: Fix iommu-cells properties name

2021-04-22 Thread Benjamin Gaignard
Add '#" to iommu-cells properties

Signed-off-by: Benjamin Gaignard 
---
 arch/arm/boot/dts/rk322x.dtsi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi
index a4dd50aaf3fc..8af2e9288029 100644
--- a/arch/arm/boot/dts/rk322x.dtsi
+++ b/arch/arm/boot/dts/rk322x.dtsi
@@ -564,7 +564,7 @@ vpu_mmu: iommu@20020800 {
interrupt-names = "vpu_mmu";
clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>;
clock-names = "aclk", "iface";
-   iommu-cells = <0>;
+   #iommu-cells = <0>;
status = "disabled";
};
 
@@ -575,7 +575,7 @@ vdec_mmu: iommu@20030480 {
interrupt-names = "vdec_mmu";
clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>;
clock-names = "aclk", "iface";
-   iommu-cells = <0>;
+   #iommu-cells = <0>;
status = "disabled";
};
 
@@ -629,7 +629,7 @@ iep_mmu: iommu@20070800 {
interrupt-names = "iep_mmu";
clocks = <&cru ACLK_IEP>, <&cru HCLK_IEP>;
clock-names = "aclk", "iface";
-   iommu-cells = <0>;
+   #iommu-cells = <0>;
status = "disabled";
};
 
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/4] iommu: rockchip: Add support iommu v2

2021-04-22 Thread Benjamin Gaignard
From: Simon Xue 

RK356x SoC got new IOMMU hardware block (version 2).
Add a compatible to distinguish it from the first version.

Signed-off-by: Simon Xue 
[Benjamin]
- port driver from kernel 4.19 to 5.12
- change functions prototype.
- squash all fixes in this commit.
Signed-off-by: Benjamin Gaignard 
---
 drivers/iommu/rockchip-iommu.c | 422 +++--
 1 file changed, 406 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e5d86b7177de..0d345d0f44fc 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -81,6 +82,30 @@
   */
 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
 
+#define DT_LO_MASK 0xf000
+#define DT_HI_MASK GENMASK_ULL(39, 32)
+#define DT_SHIFT   28
+
+#define DTE_BASE_HI_MASK GENMASK(11, 4)
+
+#define PAGE_DESC_LO_MASK   0xf000
+#define PAGE_DESC_HI1_LOWER 32
+#define PAGE_DESC_HI1_UPPER 35
+#define PAGE_DESC_HI2_LOWER 36
+#define PAGE_DESC_HI2_UPPER 39
+#define PAGE_DESC_HI_MASK1  GENMASK_ULL(PAGE_DESC_HI1_UPPER, 
PAGE_DESC_HI1_LOWER)
+#define PAGE_DESC_HI_MASK2  GENMASK_ULL(PAGE_DESC_HI2_UPPER, 
PAGE_DESC_HI2_LOWER)
+
+#define DTE_HI1_LOWER 8
+#define DTE_HI1_UPPER 11
+#define DTE_HI2_LOWER 4
+#define DTE_HI2_UPPER 7
+#define DTE_HI_MASK1  GENMASK(DTE_HI1_UPPER, DTE_HI1_LOWER)
+#define DTE_HI_MASK2  GENMASK(DTE_HI2_UPPER, DTE_HI2_LOWER)
+
+#define PAGE_DESC_HI_SHIFT1 (PAGE_DESC_HI1_LOWER - DTE_HI1_LOWER)
+#define PAGE_DESC_HI_SHIFT2 (PAGE_DESC_HI2_LOWER - DTE_HI2_LOWER)
+
 struct rk_iommu_domain {
struct list_head iommus;
u32 *dt; /* page directory table */
@@ -91,6 +116,10 @@ struct rk_iommu_domain {
struct iommu_domain domain;
 };
 
+struct rockchip_iommu_data {
+   u32 version;
+};
+
 /* list of clocks required by IOMMU */
 static const char * const rk_iommu_clocks[] = {
"aclk", "iface",
@@ -108,6 +137,7 @@ struct rk_iommu {
struct list_head node; /* entry in rk_iommu_domain.iommus */
struct iommu_domain *domain; /* domain to which iommu is attached */
struct iommu_group *group;
+   u32 version;
 };
 
 struct rk_iommudata {
@@ -174,11 +204,32 @@ static struct rk_iommu_domain *to_rk_domain(struct 
iommu_domain *dom)
 #define RK_DTE_PT_ADDRESS_MASK0xf000
 #define RK_DTE_PT_VALID   BIT(0)
 
+/*
+ * In v2:
+ * 31:12 - PT address bit 31:0
+ * 11: 8 - PT address bit 35:32
+ *  7: 4 - PT address bit 39:36
+ *  3: 1 - Reserved
+ * 0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK_V2 0xfff0
+
 static inline phys_addr_t rk_dte_pt_address(u32 dte)
 {
return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
 }
 
+static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+{
+   u64 dte_v2 = dte;
+
+   dte_v2 = ((dte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) |
+((dte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) |
+(dte_v2 & PAGE_DESC_LO_MASK);
+
+   return (phys_addr_t)dte_v2;
+}
+
 static inline bool rk_dte_is_pt_valid(u32 dte)
 {
return dte & RK_DTE_PT_VALID;
@@ -189,6 +240,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
 }
 
+static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma)
+{
+   pt_dma = (pt_dma & PAGE_DESC_LO_MASK) |
+((pt_dma & PAGE_DESC_HI_MASK1) >> PAGE_DESC_HI_SHIFT1) |
+(pt_dma & PAGE_DESC_HI_MASK2) >> PAGE_DESC_HI_SHIFT2;
+
+   return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID;
+}
+
 /*
  * Each PTE has a Page address, some flags and a valid bit:
  * +-+---+---+-+
@@ -215,11 +275,37 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
 #define RK_PTE_PAGE_READABLE  BIT(1)
 #define RK_PTE_PAGE_VALID BIT(0)
 
+/*
+ * In v2:
+ * 31:12 - Page address bit 31:0
+ *  11:9 - Page address bit 34:32
+ *   8:4 - Page address bit 39:35
+ * 3 - Security
+ * 2 - Readable
+ * 1 - Writable
+ * 0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_ADDRESS_MASK_V2  0xfff0
+#define RK_PTE_PAGE_FLAGS_MASK_V20x000e
+#define RK_PTE_PAGE_READABLE_V2  BIT(2)
+#define RK_PTE_PAGE_WRITABLE_V2  BIT(1)
+
 static inline phys_addr_t rk_pte_page_address(u32 pte)
 {
return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
 }
 
+static inline phys_addr_t rk_pte_page_address_v2(u32 pte)
+{
+   u64 pte_v2 = pte;
+
+   pte_v2 = ((pte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) |
+((pte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) |
+(pte_v2 & PAGE_DESC_LO_MASK);
+
+   return (phys_addr_t)pte_v2;
+}
+
 static inline bool rk_pte_is_page_valid(u32 pte)
 {
return pte & RK_PTE_PAGE_VALID;
@@ -229,12 +315,26 @@ static inline bool rk_pte_is_page_valid(u32 pte)
 static u32 rk_mk_pte(phys_addr_t page, int prot)

[PATCH v2 0/4] Add driver for rk356x

2021-04-22 Thread Benjamin Gaignard
This series adds the IOMMU driver for rk356x SoC.
Since a new compatible is needed to distinguish this second version of 
IOMMU hardware block from the first one, it is an opportunity to convert
the binding to DT schema.

version 2:
 - Fix iommu-cells typo in rk322x.dtsi
 - Change maintainer
 - Change reg maxItems
 - Add power-domains property

Benjamin Gaignard (3):
  dt-bindings: iommu: rockchip: Convert IOMMU to DT schema
  dt-bindings: iommu: rockchip: Add compatible for v2
  ARM: dts: rockchip: rk322x: Fix iommu-cells properties name

Simon Xue (1):
  iommu: rockchip: Add support iommu v2

 .../bindings/iommu/rockchip,iommu.txt |  38 --
 .../bindings/iommu/rockchip,iommu.yaml|  84 
 arch/arm/boot/dts/rk322x.dtsi |   6 +-
 drivers/iommu/rockchip-iommu.c| 422 +-
 4 files changed, 493 insertions(+), 57 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] dt-bindings: iommu: rockchip: Convert IOMMU to DT schema

2021-04-22 Thread Johan Jonker
Hi Benjamin,

Please check robh/dtbs-check failed build log at

https://patchwork.ozlabs.org/project/devicetree-bindings/patch/20210422072442.111070-2-benjamin.gaign...@collabora.com/

make ARCH=arm64 dt_binding_check
DT_SCHEMA_FILES=Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml


make ARCH=arm dtbs_check
DT_SCHEMA_FILES=Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

make ARCH=arm64 dtbs_check
DT_SCHEMA_FILES=Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

Test if all notifications are gone.

===

YAML also checks extra properties like "power-domains" not yet included
but needed for rk3568.
Add them in a separate patch.

===

rk3229-evb.dt.yaml: iommu@20030480: 'iommu-cells' does not match any of
the regexes

Change a rk322x.dtsi property to #iommu-cells in a separate patch.

===

rk3229-xms6.dt.yaml: iommu@20030480: reg: [[537068672, 64], [537068736,
64]] is too long

Change reg minItems maxItems.

===

Johan

On 4/22/21 9:24 AM, Benjamin Gaignard wrote:
> Convert Rockchip IOMMU to DT schema
> 
> Signed-off-by: Benjamin Gaignard 
> ---
>  .../bindings/iommu/rockchip,iommu.txt | 38 --
>  .../bindings/iommu/rockchip,iommu.yaml| 76 +++
>  2 files changed, 76 insertions(+), 38 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>  create mode 100644 
> Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> 
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt 
> b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> deleted file mode 100644
> index 6ecefea1c6f9..
> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> +++ /dev/null
> @@ -1,38 +0,0 @@
> -Rockchip IOMMU
> -==
> -
> -A Rockchip DRM iommu translates io virtual addresses to physical addresses 
> for
> -its master device.  Each slave device is bound to a single master device, and
> -shares its clocks, power domain and irq.
> -
> -Required properties:
> -- compatible  : Should be "rockchip,iommu"
> -- reg : Address space for the configuration registers
> -- interrupts  : Interrupt specifier for the IOMMU instance
> -- interrupt-names : Interrupt name for the IOMMU instance
> -- #iommu-cells: Should be <0>.  This indicates the iommu is a
> -"single-master" device, and needs no additional 
> information
> -to associate with its master device.  See:
> -Documentation/devicetree/bindings/iommu/iommu.txt
> -- clocks  : A list of clocks required for the IOMMU to be accessible 
> by
> -the host CPU.
> -- clock-names : Should contain the following:
> - "iface" - Main peripheral bus clock (PCLK/HCL) (required)
> - "aclk"  - AXI bus clock (required)
> -
> -Optional properties:
> -- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
> -Some mmu instances may produce unexpected results
> -when the reset operation is used.
> -
> -Example:
> -
> - vopl_mmu: iommu@ff940300 {
> - compatible = "rockchip,iommu";
> - reg = <0xff940300 0x100>;
> - interrupts = ;
> - interrupt-names = "vopl_mmu";
> - clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
> - clock-names = "aclk", "iface";
> - #iommu-cells = <0>;
> - };
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
> b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> new file mode 100644
> index ..ab128f8e4c73
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
> @@ -0,0 +1,76 @@

> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)

GPL-2.0
This is a conversion of an existing document.

> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Rockchip IOMMU
> +
> +maintainers:
> +  - Simon Xue 

  - Heiko Stuebner 

Add someone that can respond in a short time in case rob+dt wants to
delete something.

> +
> +description: |+
> +  A Rockchip DRM iommu translates io virtual addresses to physical addresses 
> for

> +  its master device. Each slave device is bound to a single master device, 
> and

No comma "," before "and"

> +  shares its clocks, power domain and irq.
> +
> +  For information on assigning IOMMU controller to its peripheral devices,
> +  see generic IOMMU bindings.
> +
> +properties:
> +  compatible:
> +const: rockchip,iommu
> +
> +  reg:

> +maxItems: 1

minItems: 1
maxItems: 2


> +
> +  interrupts:
> +maxItems: 1
> +
> +  interrupt-names:
> +maxItems: 1
> +
> +  clocks:
> +items:
> +  - description: Core clock
> +  - description: Interface clock
> +
> +  clock-names:
> +items:
> +  - const: aclk
> +  - const: i

[PATCH] iommu/amd: Add amd_iommu=force_enable option

2021-04-22 Thread Joerg Roedel
From: Joerg Roedel 

Add this option to enable the IOMMU on platforms like AMD Stoney,
where the kernel usually disables it because it may cause problems in
some scenarios.

Signed-off-by: Joerg Roedel 
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 drivers/iommu/amd/init.c| 7 +++
 2 files changed, 10 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..c9573f726707 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -303,6 +303,9 @@
  allowed anymore to lift isolation
  requirements as needed. This option
  does not override iommu=pt
+   force_enable - Force enable the IOMMU on platforms known
+  to be buggy with IOMMU enabled. Use this
+  option with care.
 
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 321f5906e6ed..3e2295d3b3e2 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -155,6 +155,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 
 static bool amd_iommu_detected;
 static bool __initdata amd_iommu_disabled;
+static bool __initdata amd_iommu_force_enable;
 static int amd_iommu_target_ivhd_type;
 
 u16 amd_iommu_last_bdf;/* largest PCI device id we have
@@ -2882,6 +2883,9 @@ static bool detect_ivrs(void)
 
acpi_put_table(ivrs_base);
 
+   if (amd_iommu_force_enable)
+   goto out;
+
/* Don't use IOMMU if there is Stoney Ridge graphics */
for (i = 0; i < 32; i++) {
u32 pci_id;
@@ -2893,6 +2897,7 @@ static bool detect_ivrs(void)
}
}
 
+out:
/* Make sure ACS will be enabled during PCI probe */
pci_request_acs();
 
@@ -3148,6 +3153,8 @@ static int __init parse_amd_iommu_options(char *str)
for (; *str; ++str) {
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
+   if (strncmp(str, "force_enable", 12) == 0)
+   amd_iommu_force_enable = true;
if (strncmp(str, "off", 3) == 0)
amd_iommu_disabled = true;
if (strncmp(str, "force_isolation", 15) == 0)
-- 
2.31.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Liu Yi L
On Wed, 21 Apr 2021 13:33:12 -0600, Alex Williamson wrote:

> On Wed, 21 Apr 2021 14:52:03 -0300
> Jason Gunthorpe  wrote:
> 
> > On Wed, Apr 21, 2021 at 10:54:51AM -0600, Alex Williamson wrote:
> >   
> > > That's essentially replacing vfio-core, where I think we're more
> > 
> > I am only talking about /dev/vfio here which is basically the IOMMU
> > interface part.
> > 
> > I still expect that VFIO_GROUP_SET_CONTAINER will be used to connect
> > /dev/{ioasid,vfio} to the VFIO group and all the group and device
> > logic stays inside VFIO.  
> 
> But that group and device logic is also tied to the container, where
> the IOMMU backend is the interchangeable thing that provides the IOMMU
> manipulation for that container.  If you're using
> VFIO_GROUP_SET_CONTAINER to associate a group to a /dev/ioasid, then
> you're really either taking that group outside of vfio or you're
> re-implementing group management in /dev/ioasid.  I'd expect the
> transition point at VFIO_SET_IOMMU.

per my understanding, transiting at the VFIO_SET_IOMMU point makes more
sense as VFIO can still have the group and device logic, which is the key
concept of group granularity isolation for userspace direct access.

-- 
Regards,
Yi Liu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Jason Gunthorpe
On Thu, Apr 22, 2021 at 08:34:32AM +, Tian, Kevin wrote:

> The shim layer could be considered as a new iommu backend in VFIO,
> which connects VFIO iommu ops to the internal helpers in
> drivers/ioasid.

It may be the best we can do because of SPAPR, but the ideal outcome
should be to remove the entire pluggable IOMMU stuff from vfio
entirely and have it only use /dev/ioasid

We should never add another pluggable IOMMU type to vfio - everything
should be done through drives/iommu now that it is much more capable.

> Another tricky thing is that a container may be linked to multiple iommu
> domains in VFIO, as devices in the container may locate behind different
> IOMMUs with inconsistent capability (commit 1ef3e2bc). 

Frankly this sounds over complicated. I would think /dev/ioasid should
select the IOMMU when the first device is joined, and all future joins
must be compatible with the original IOMMU - ie there is only one set
of IOMMU capabilities in a /dev/ioasid.

This means qemue might have multiple /dev/ioasid's if the system has
multiple incompatible IOMMUs (is this actually a thing?) The platform
should design its IOMMU domains to minimize the number of
/dev/ioasid's required.

Is there a reason we need to share IOASID'd between completely
divergance IOMMU implementations? I don't expect the HW should be able
to physically share page tables??

That decision point alone might be the thing that just says we can't
ever have /dev/vfio/vfio == /dev/ioasid

> Just to confirm. Above flow is for current map/unmap flavor as what
> VFIO/vDPA do today. Later when nested translation is supported,
> there is no need to detach gpa_ioasid_fd. Instead, a new cmd will
> be introduced to nest rid_ioasid_fd on top of gpa_ioasid_fd:

Sure.. The tricky bit will be to define both of the common nested
operating modes.

  nested_ioasid = ioctl(ioasid_fd, CREATE_NESTED_IOASID,  gpa_ioasid_id);
  ioctl(ioasid_fd, SET_NESTED_IOASID_PAGE_TABLES, nested_ioasid, ..)

   // IOMMU will match on the device RID, no PASID:
  ioctl(vfio_device, ATTACH_IOASID, nested_ioasid);

   // IOMMU will match on the device RID and PASID:
  ioctl(vfio_device, ATTACH_IOASID_PASID, pasid, nested_ioasid);

Notice that ATTACH (or bind, whatever) is always done on the
vfio_device FD. ATTACH tells the IOMMU HW to link the PCI BDF&PASID to
a specific page table defined by an IOASID.

I expect we have many flavours of IOASID tables, eg we have normal,
and 'nested with table controlled by hypervisor'. ARM has 'nested with
table controlled by guest' right? So like this?

  nested_ioasid = ioctl(ioasid_fd, CREATE_DELGATED_IOASID,
   gpa_ioasid_id, )
  // PASID now goes to 
  ioctl(vfio_device, ATTACH_IOASID_PASID, pasid, nested_ioasid);

Where  is some internal to the guest handle of the viommu
page table scoped within gpa_ioasid_id? Like maybe it is GPA of the
base of the page table?

The guest can't select its own PASIDs without telling the hypervisor,
right?

> I also feel hiding group from uAPI is a good thing and is interested in
> the rationale behind for explicitly managing group in vfio (which is
> essentially the same boundary as provided by iommu group), e.g. for 
> better user experience when group security is broken? 

Indeed, I can see how things might have just evolved into this, but if
it has a purpose it seems pretty hidden.
we need it or not seems pretty hidden.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/amd: Clear DMA ops when switching domain

2021-04-22 Thread Jean-Philippe Brucker
Since commit 08a27c1c3ecf ("iommu: Add support to change default domain
of an iommu group") a user can switch a device between IOMMU and direct
DMA through sysfs. This doesn't work for AMD IOMMU at the moment because
dev->dma_ops is not cleared when switching from a DMA to an identity
IOMMU domain. The DMA layer thus attempts to use the dma-iommu ops on an
identity domain, causing an oops:

  # echo :00:05.0 > /sys/sys/bus/pci/drivers/e1000e/unbind
  # echo identity > /sys/bus/pci/devices/:00:05.0/iommu_group/type
  # echo :00:05.0 > /sys/sys/bus/pci/drivers/e1000e/bind
   ...
  BUG: kernel NULL pointer dereference, address: 0028
   ...
   Call Trace:
iommu_dma_alloc
e1000e_setup_tx_resources
e1000e_open

Since iommu_change_dev_def_domain() calls probe_finalize() again, clear
the dma_ops there like Vt-d does.

Fixes: 08a27c1c3ecf ("iommu: Add support to change default domain of an iommu 
group")
Signed-off-by: Jean-Philippe Brucker 

---
It could be factored into iommu_setup_dma_ops(), but this is easier to
backport and less likely to break other platforms. Since I need the same
for virtio-iommu, I'll do the factoring there.

My previous attempt at fixing this by implementing
arch_teardown_dma_ops() [1] was misguided. It's how arm64 deals with the
problem but it cannot reliably work on x86, because there the DMA ops
are set on device add, not on driver bind.

Thankfully Boris reported a regression on his test box and dropped that
patch [2]. Although I couldn't reproduce it I'm guessing what happens
is:

* probe_finalize(), called from device_add() or bus_set_iommu() sets up
  the DMA ops.
* Some driver, possibly ata_generic, probes the device and returns
  -ENODEV. arch_teardown_dma_ops() clears the DMA ops.
* Another driver probes the device and starts DMA. Now the direct DMA
  ops are used even though IOMMU protection is active, DMA faults.

[1] 
https://lore.kernel.org/linux-iommu/20210414082633.877461-1-jean-phili...@linaro.org/
[2] https://lore.kernel.org/lkml/20210417120644.ga5...@zn.tnic/
---
 drivers/iommu/amd/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a69a8b573e40..7de7a260706b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1747,6 +1747,8 @@ static void amd_iommu_probe_finalize(struct device *dev)
domain = iommu_get_domain_for_dev(dev);
if (domain->type == IOMMU_DOMAIN_DMA)
iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
+   else
+   set_dma_ops(dev, NULL);
 }
 
 static void amd_iommu_release_device(struct device *dev)
-- 
2.31.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH V4 05/18] iommu/ioasid: Redefine IOASID set and allocation APIs

2021-04-22 Thread Tian, Kevin
> From: Jason Gunthorpe 
> Sent: Thursday, April 22, 2021 7:03 AM
> 
> > The pseudo code above really suggests you do want to remove
> > /dev/vfio/vfio, but this is only one of the IOMMU backends for vfio, so
> > I can't quite figure out if we're talking past each other.
> 
> I'm not quite sure what you mean by "one of the IOMMU backends?" You
> mean type1, right?

I think Alex meant that type1 is one of the IOMMU backends in VFIO (type1,
type1v2, tce, tce_v2, noiommu, etc.) which are all configured through 
/dev/vfio/vfio. If we are just moving type1 to /dev/ioasid, the justification
is not sufficient by replacing /dev/vfio/vfio with /dev/ioasid, at least in
this transition phase (before all iommu bits are consolidated in /dev/ioasid
in your ideal outcome).

> 
> > As I expressed in another thread, type1 has a lot of shortcomings.  The
> > mapping interface leaves userspace trying desperately to use statically
> > mapped buffers because the map/unmap latency is too high.  We have
> > horrible issues with duplicate locked page accounting across
> > containers.  It suffers pretty hard from feature creep in various
> > areas.  A new IOMMU backend is an opportunity to redesign some of these
> > things.
> 
> Sure, but also those kinds of transformational things go alot better
> if you can smoothly go from the old to the new and have technical
> co-existance in side the kernel. Having a shim that maps the old APIs
> to new APIs internally to Linux helps keep the implementation from
> becoming too bogged down with compatibility.

The shim layer could be considered as a new iommu backend in VFIO,
which connects VFIO iommu ops to the internal helpers in drivers/ioasid.
In this case then we don't need to replicate the VFIO uAPI through
/dev/ioasid. Instead the new interface just supports new uAPI. An old
VFIO userspace still opens /dev/vfio/vfio to conduct iommu operations
which implicitly goes to drivers/ioasid. A new VFIO userspace uses 
/dev/vfio/vfio to join ioasid_fd and then use new uAPIs through /dev/
ioasid to manage iommu pgtables, as you described below.

> 
> > The IOMMU group also abstracts isolation and visibility relative to
> > DMA.  For example, in a PCIe topology a multi-function device may not
> > have isolation between functions, but each requester ID is visible to
> > the IOMMU.
> 
> Okay, I'm glad I have this all right in my head, as I was pretty sure
> this was what the group was about.
> 
> My next question is why do we have three things as a FD: group, device
> and container (aka IOMMU interface)?
> 
> Do we have container because the /dev/vfio/vfio can hold only a single
> page table so we need to swap containers sometimes?

Yes, one container can hold only a single page table. When vIOMMU is
exposed, VFIO requires each device/group in different containers to
support per-device address space (before nested translation is supported),
which is switched between GPA and gIOVA when bypass mode is turned 
on/off for a given device.

Another tricky thing is that a container may be linked to multiple iommu
domains in VFIO, as devices in the container may locate behind different
IOMMUs with inconsistent capability (commit 1ef3e2bc). In this case 
more accurately one container can hold a single address space, which could
be replayed into multiple page tables (with exact same mappings). I'm not
sure whether this is something that could be simplified (or not supported)
in the new interface. In the end each pgtable operation is per iommu domain
in the iommu layer. I wonder where we want to maintain the relationship
between the ioasid_fd and associated iommu domains.

> 
> If we start from a clean sheet and make a sketch..
> 
> /dev/ioasid is the IOMMU control interface. It can create multiple
> IOASIDs that have page tables and it can manipulate those page tables.
> Each IOASID is identified by some number.
> 
> struct vfio_device/vdpa_device/etc are consumers of /dev/ioasid
> 
> When a device attaches to an ioasid userspace gives VFIO/VDPA the
> ioasid FD and the ioasid # in the FD.
> 
> The security rule for isolation is that once a device is attached to a
> /dev/ioasid fd then all other devices in that security group must be
> attached to the same ioasid FD or left unused.
> 
> Thus /dev/ioasid also becomes the unit of security and the IOMMU
> subsystem level becomes aware of and enforces the group security
> rules. Userspace does not need to "see" the group
> 
> In sketch it would be like
>   ioasid_fd = open("/dev/ioasid");
>   vfio_device_fd = open("/dev/vfio/device0")
>   vdpa_device_fd = open("/dev/vdpa/device0")
>   ioctl(vifo_device_fd, JOIN_IOASID_FD, ioasifd)
>   ioctl(vdpa_device_fd, JOIN_IOASID_FD, ioasifd)
> 
>   gpa_ioasid_id = ioctl(ioasid_fd, CREATE_IOASID, ..)
>   ioctl(ioasid_fd, SET_IOASID_PAGE_TABLES, ..)
> 
>   ioctl(vfio_device, ATTACH_IOASID, gpa_ioasid_id)
>   ioctl(vpda_device, ATTACH_IOASID, gpa_ioasid_id)
> 
>   .. both VDPA and VFIO see the guest physical map and the kernel

Re: [PATCH v4 00/14] Restricted DMA

2021-04-22 Thread Claire Chang
v5 here: https://lore.kernel.org/patchwork/cover/1416899/ to rebase
onto Christoph's swiotlb cleanups.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 16/16] of: Add plumbing for restricted DMA pool

2021-04-22 Thread Claire Chang
If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.

Signed-off-by: Claire Chang 
---
 drivers/of/address.c| 25 +
 drivers/of/device.c |  3 +++
 drivers/of/of_private.h |  5 +
 3 files changed, 33 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 54f221dde267..fff3adfe4986 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1109,6 +1110,30 @@ bool of_dma_is_coherent(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
 
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+   struct device_node *node;
+   int count, i;
+
+   if (!dev->of_node)
+   return 0;
+
+   count = of_property_count_elems_of_size(dev->of_node, "memory-region",
+   sizeof(phandle));
+   for (i = 0; i < count; i++) {
+   node = of_parse_phandle(dev->of_node, "memory-region", i);
+   /* There might be multiple memory regions, but only one
+* restriced-dma-pool region is allowed.
+*/
+   if (of_device_is_compatible(node, "restricted-dma-pool") &&
+   of_device_is_available(node))
+   return of_reserved_mem_device_init_by_idx(
+   dev, dev->of_node, i);
+   }
+
+   return 0;
+}
+
 /**
  * of_mmio_is_nonposted - Check if device uses non-posted MMIO
  * @np:device node
diff --git a/drivers/of/device.c b/drivers/of/device.c
index c5a9473a5fb1..d8d865223e51 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -165,6 +165,9 @@ int of_dma_configure_id(struct device *dev, struct 
device_node *np,
 
arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+   if (!iommu)
+   return of_dma_set_restricted_buffer(dev);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d717efbd637d..e9237f5eff48 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -163,12 +163,17 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev);
 #else
 static inline int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map)
 {
return -ENODEV;
 }
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 15/16] dt-bindings: of: Add restricted DMA pool

2021-04-22 Thread Claire Chang
Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the reserved-memory node.

Signed-off-by: Claire Chang 
---
 .../reserved-memory/reserved-memory.txt   | 24 +++
 1 file changed, 24 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..fc9a12c2f679 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,20 @@ compatible (optional) - standard definition
   used as a shared pool of DMA buffers for a set of devices. It can
   be used by an operating system to instantiate the necessary pool
   management subsystem if necessary.
+- restricted-dma-pool: This indicates a region of memory meant to be
+  used as a pool of restricted DMA buffers for a set of devices. The
+  memory region would be the only region accessible to those devices.
+  When using this, the no-map and reusable properties must not be set,
+  so the operating system can create a virtual mapping that will be 
used
+  for synchronization. The main purpose for restricted DMA is to
+  mitigate the lack of DMA access control on systems without an IOMMU,
+  which could result in the DMA accessing the system memory at
+  unexpected times and/or unexpected addresses, possibly leading to 
data
+  leakage or corruption. The feature on its own provides a basic level
+  of protection against the DMA overwriting buffer contents at
+  unexpected times. However, to protect against general data leakage 
and
+  system memory corruption, the system needs to provide way to lock 
down
+  the memory access, e.g., MPU.
 - vendor specific string in the form ,[-]
 no-map (optional) - empty property
 - Indicates the operating system must not create a virtual mapping
@@ -120,6 +134,11 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
compatible = "acme,multimedia-memory";
reg = <0x7700 0x400>;
};
+
+   restricted_dma_mem_reserved: restricted_dma_mem_reserved {
+   compatible = "restricted-dma-pool";
+   reg = <0x5000 0x40>;
+   };
};
 
/* ... */
@@ -138,4 +157,9 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
memory-region = <&multimedia_reserved>;
/* ... */
};
+
+   pcie_device: pcie_device@0,0 {
+   memory-region = <&restricted_dma_mem_reserved>;
+   /* ... */
+   };
 };
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 14/16] dma-direct: Allocate memory from restricted DMA pool if available

2021-04-22 Thread Claire Chang
The restricted DMA pool is preferred if available.

The restricted DMA pools provide a basic level of protection against the
DMA overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system
needs to provide a way to lock down the memory access, e.g., MPU.

Signed-off-by: Claire Chang 
---
 kernel/dma/direct.c | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 7a27f0510fcc..29523d2a9845 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -78,6 +78,10 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t 
phys, size_t size)
 static void __dma_direct_free_pages(struct device *dev, struct page *page,
size_t size)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (swiotlb_free(dev, page, size))
+   return;
+#endif
dma_free_contiguous(dev, page, size);
 }
 
@@ -92,7 +96,17 @@ static struct page *__dma_direct_alloc_pages(struct device 
*dev, size_t size,
 
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
   &phys_limit);
-   page = dma_alloc_contiguous(dev, size, gfp);
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   page = swiotlb_alloc(dev, size);
+   if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+   __dma_direct_free_pages(dev, page, size);
+   page = NULL;
+   }
+#endif
+
+   if (!page)
+   page = dma_alloc_contiguous(dev, size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, size);
page = NULL;
@@ -148,7 +162,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
gfp |= __GFP_NOWARN;
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-   !force_dma_unencrypted(dev)) {
+   !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
if (!page)
return NULL;
@@ -161,8 +175,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
}
 
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-   !dev_is_dma_coherent(dev))
+   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+   !is_dev_swiotlb_force(dev))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
/*
@@ -172,7 +186,9 @@ void *dma_direct_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
!gfpflags_allow_blocking(gfp) &&
(force_dma_unencrypted(dev) ||
-(IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 
!dev_is_dma_coherent(dev
+(IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+ !dev_is_dma_coherent(dev))) &&
+   !is_dev_swiotlb_force(dev))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
/* we always manually zero the memory once we are done */
@@ -253,15 +269,15 @@ void dma_direct_free(struct device *dev, size_t size,
unsigned int page_order = get_order(size);
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-   !force_dma_unencrypted(dev)) {
+   !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
dma_free_contiguous(dev, cpu_addr, size);
return;
}
 
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-   !dev_is_dma_coherent(dev)) {
+   !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+   !is_dev_swiotlb_force(dev)) {
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
return;
}
@@ -289,7 +305,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
void *ret;
 
if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-   force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+   force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
+   !is_dev_swiotlb_force(dev))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
page = __dma_direct_alloc_pages(dev, size, gfp);
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 13/16] swiotlb: Add restricted DMA alloc/free support.

2021-04-22 Thread Claire Chang
Add the functions, swiotlb_{alloc,free} to support the memory allocation
from restricted DMA pool.

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h |  4 
 kernel/dma/swiotlb.c| 35 +--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 0c5a18d9cf89..e8cf49bd90c5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -134,6 +134,10 @@ unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long size);
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *swiotlb_alloc(struct device *dev, size_t size);
+bool swiotlb_free(struct device *dev, struct page *page, size_t size);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index af0feb8eaead..274272c79080 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -454,8 +454,9 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
 
index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
do {
-   if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
-   (orig_addr & iotlb_align_mask)) {
+   if (orig_addr &&
+   (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+   (orig_addr & iotlb_align_mask)) {
index = wrap_index(mem, index + 1);
continue;
}
@@ -695,6 +696,36 @@ late_initcall(swiotlb_create_default_debugfs);
 #endif
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *swiotlb_alloc(struct device *dev, size_t size)
+{
+   struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+   phys_addr_t tlb_addr;
+   int index;
+
+   if (!mem)
+   return NULL;
+
+   index = find_slots(dev, 0, size);
+   if (index == -1)
+   return NULL;
+
+   tlb_addr = slot_addr(mem->start, index);
+
+   return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+bool swiotlb_free(struct device *dev, struct page *page, size_t size)
+{
+   phys_addr_t tlb_addr = page_to_phys(page);
+
+   if (!is_swiotlb_buffer(dev, tlb_addr))
+   return false;
+
+   release_slots(dev, tlb_addr);
+
+   return true;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
struct device *dev)
 {
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 12/16] dma-direct: Add a new wrapper __dma_direct_free_pages()

2021-04-22 Thread Claire Chang
Add a new wrapper __dma_direct_free_pages() that will be useful later
for swiotlb_free().

Signed-off-by: Claire Chang 
---
 kernel/dma/direct.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 7a88c34d0867..7a27f0510fcc 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,12 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t 
phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
 }
 
+static void __dma_direct_free_pages(struct device *dev, struct page *page,
+   size_t size)
+{
+   dma_free_contiguous(dev, page, size);
+}
+
 static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp)
 {
@@ -237,7 +243,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
return NULL;
}
 out_free_pages:
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
return NULL;
 }
 
@@ -273,7 +279,7 @@ void dma_direct_free(struct device *dev, size_t size,
else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
arch_dma_clear_uncached(cpu_addr, size);
 
-   dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
+   __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
 }
 
 struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -310,7 +316,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return page;
 out_free_pages:
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
return NULL;
 }
 
@@ -329,7 +335,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,
if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
 
-   dma_free_contiguous(dev, page, size);
+   __dma_direct_free_pages(dev, page, size);
 }
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 11/16] swiotlb: Refactor swiotlb_tbl_unmap_single

2021-04-22 Thread Claire Chang
Add a new function, release_slots, to make the code reusable for supporting
different bounce buffer pools, e.g. restricted DMA pool.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 35 ---
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b7d634d7a7eb..af0feb8eaead 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -547,27 +547,15 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return tlb_addr;
 }
 
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t mapping_size, enum dma_data_direction dir,
- unsigned long attrs)
+static void release_slots(struct device *dev, phys_addr_t tlb_addr)
 {
-   struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
+   struct io_tlb_mem *mem = get_io_tlb_mem(dev);
unsigned long flags;
-   unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+   unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
int nslots = nr_slots(mem->slots[index].alloc_size + offset);
int count, i;
 
-   /*
-* First, sync the memory before unmapping the entry
-*/
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-   swiotlb_bounce(hwdev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
-
/*
 * Return the buffer to the free list by setting the corresponding
 * entries to indicate the number of contiguous entries available.
@@ -602,6 +590,23 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, 
phys_addr_t tlb_addr,
spin_unlock_irqrestore(&mem->lock, flags);
 }
 
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+   /*
+* First, sync the memory before unmapping the entry
+*/
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+   swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+   release_slots(dev, tlb_addr);
+}
+
 void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir)
 {
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 10/16] swiotlb: Move alloc_size to find_slots

2021-04-22 Thread Claire Chang
Move the maintenance of alloc_size to find_slots for better code
reusability later.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 96ff36d8ec53..b7d634d7a7eb 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -479,8 +479,11 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
return -1;
 
 found:
-   for (i = index; i < index + nslots; i++)
+   for (i = index; i < index + nslots; i++) {
mem->slots[i].list = 0;
+   mem->slots[i].alloc_size =
+   alloc_size - ((i - index) << IO_TLB_SHIFT);
+   }
for (i = index - 1;
 io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
 mem->slots[i].list; i--)
@@ -535,11 +538,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
 * This is needed when we sync the memory.  Then we sync the buffer if
 * needed.
 */
-   for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+   for (i = 0; i < nr_slots(alloc_size + offset); i++)
mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
-   mem->slots[index + i].alloc_size =
-   alloc_size - (i << IO_TLB_SHIFT);
-   }
tlb_addr = slot_addr(mem->start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 09/16] swiotlb: Bounce data from/to restricted DMA pool if available

2021-04-22 Thread Claire Chang
Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.

The restricted DMA pools provide a basic level of protection against the
DMA overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system
needs to provide a way to lock down the memory access, e.g., MPU.

Note that is_dev_swiotlb_force doesn't check if
swiotlb_force == SWIOTLB_FORCE. Otherwise the memory allocation behavior
with default swiotlb will be changed by the following patche
("dma-direct: Allocate memory from restricted DMA pool if available").

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h | 13 +
 kernel/dma/direct.h |  3 ++-
 kernel/dma/swiotlb.c|  8 
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index c530c976d18b..0c5a18d9cf89 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -120,6 +120,15 @@ static inline bool is_swiotlb_buffer(struct device *dev, 
phys_addr_t paddr)
return mem && paddr >= mem->start && paddr < mem->end;
 }
 
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (dev->dma_io_tlb_mem)
+   return true;
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+   return false;
+}
+
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
@@ -131,6 +140,10 @@ static inline bool is_swiotlb_buffer(struct device *dev, 
phys_addr_t paddr)
 {
return false;
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+   return false;
+}
 static inline void swiotlb_exit(void)
 {
 }
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 13e9e7158d94..f94813674e23 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -87,7 +87,8 @@ static inline dma_addr_t dma_direct_map_page(struct device 
*dev,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-   if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+   if (unlikely(swiotlb_force == SWIOTLB_FORCE) ||
+   is_dev_swiotlb_force(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
 
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1d221343f1c8..96ff36d8ec53 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -344,7 +344,7 @@ void __init swiotlb_exit(void)
 static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t 
size,
   enum dma_data_direction dir)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
+   struct io_tlb_mem *mem = get_io_tlb_mem(dev);
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
@@ -426,7 +426,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
size_t alloc_size)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
+   struct io_tlb_mem *mem = get_io_tlb_mem(dev);
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -503,7 +503,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
+   struct io_tlb_mem *mem = get_io_tlb_mem(dev);
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned int i;
int index;
@@ -554,7 +554,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, 
phys_addr_t tlb_addr,
  size_t mapping_size, enum dma_data_direction dir,
  unsigned long attrs)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
+   struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
unsigned long flags;
unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 08/16] swiotlb: Update is_swiotlb_active to add a struct device argument

2021-04-22 Thread Claire Chang
Update is_swiotlb_active to add a struct device argument. This will be
useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang 
---
 drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c| 2 +-
 drivers/pci/xen-pcifront.c   | 2 +-
 include/linux/swiotlb.h  | 4 ++--
 kernel/dma/direct.c  | 2 +-
 kernel/dma/swiotlb.c | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c 
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ce6b664b10aa..7d48c433446b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -42,7 +42,7 @@ static int i915_gem_object_get_pages_internal(struct 
drm_i915_gem_object *obj)
 
max_order = MAX_ORDER;
 #ifdef CONFIG_SWIOTLB
-   if (is_swiotlb_active()) {
+   if (is_swiotlb_active(NULL)) {
unsigned int max_segment;
 
max_segment = swiotlb_max_segment();
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index e8b506a6685b..2a2ae6d6cf6d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -321,7 +321,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
}
 
 #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
-   need_swiotlb = is_swiotlb_active();
+   need_swiotlb = is_swiotlb_active(NULL);
 #endif
 
ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index b7a8f3a1921f..6d548ce53ce7 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -693,7 +693,7 @@ static int pcifront_connect_and_init_dma(struct 
pcifront_device *pdev)
 
spin_unlock(&pcifront_dev_lock);
 
-   if (!err && !is_swiotlb_active()) {
+   if (!err && !is_swiotlb_active(NULL)) {
err = pci_xen_swiotlb_init_late();
if (err)
dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 2a6cca07540b..c530c976d18b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -123,7 +123,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, 
phys_addr_t paddr)
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
+bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
@@ -143,7 +143,7 @@ static inline size_t swiotlb_max_mapping_size(struct device 
*dev)
return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
 {
return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 84c9feb5474a..7a88c34d0867 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -495,7 +495,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
/* If SWIOTLB is active, use its maximum mapping size */
-   if (is_swiotlb_active() &&
+   if (is_swiotlb_active(dev) &&
(dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
return swiotlb_max_mapping_size(dev);
return SIZE_MAX;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index ffbb8724e06c..1d221343f1c8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -659,9 +659,9 @@ size_t swiotlb_max_mapping_size(struct device *dev)
return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
 }
 
-bool is_swiotlb_active(void)
+bool is_swiotlb_active(struct device *dev)
 {
-   return io_tlb_default_mem != NULL;
+   return get_io_tlb_mem(dev) != NULL;
 }
 EXPORT_SYMBOL_GPL(is_swiotlb_active);
 
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 07/16] swiotlb: Update is_swiotlb_buffer to add a struct device argument

2021-04-22 Thread Claire Chang
Update is_swiotlb_buffer to add a struct device argument. This will be
useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang 
---
 drivers/iommu/dma-iommu.c | 12 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  6 +++---
 kernel/dma/direct.c   |  6 +++---
 kernel/dma/direct.h   |  6 +++---
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..a5df35bfd150 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -504,7 +504,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
 
__iommu_dma_unmap(dev, dma_addr, size);
 
-   if (unlikely(is_swiotlb_buffer(phys)))
+   if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 
@@ -575,7 +575,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
}
 
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
+   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
return iova;
 }
@@ -781,7 +781,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(phys, size, dir);
 
-   if (is_swiotlb_buffer(phys))
+   if (is_swiotlb_buffer(dev, phys))
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
@@ -794,7 +794,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   if (is_swiotlb_buffer(phys))
+   if (is_swiotlb_buffer(dev, phys))
swiotlb_sync_single_for_device(dev, phys, size, dir);
 
if (!dev_is_dma_coherent(dev))
@@ -815,7 +815,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-   if (is_swiotlb_buffer(sg_phys(sg)))
+   if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
sg->length, dir);
}
@@ -832,7 +832,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
return;
 
for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
+   if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_sync_single_for_device(dev, sg_phys(sg),
   sg->length, dir);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4c89afc0df62..0c6ed09f8513 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -100,7 +100,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, 
dma_addr_t dma_addr)
 * in our domain. Therefore _only_ check address within our domain.
 */
if (pfn_valid(PFN_DOWN(paddr)))
-   return is_swiotlb_buffer(paddr);
+   return is_swiotlb_buffer(dev, paddr);
return 0;
 }
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b469f04cca26..2a6cca07540b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -113,9 +113,9 @@ static inline struct io_tlb_mem *get_io_tlb_mem(struct 
device *dev)
return io_tlb_default_mem;
 }
 
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
+   struct io_tlb_mem *mem = get_io_tlb_mem(dev);
 
return mem && paddr >= mem->start && paddr < mem->end;
 }
@@ -127,7 +127,7 @@ bool is_swiotlb_active(void);
 void __init swiotlb_adjust_size(unsigned long size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index f737e3347059..84c9feb5474a 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
-   if (unlikely(is_swiotlb_buffer(paddr)))
+   if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_sync_single_for_device(dev, paddr, sg->length,
   dir);
 
@@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device 

[PATCH v5 05/16] swiotlb: Add restricted DMA pool initialization

2021-04-22 Thread Claire Chang
Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes.

Signed-off-by: Claire Chang 
---
 include/linux/device.h  |  4 +++
 include/linux/swiotlb.h |  3 +-
 kernel/dma/swiotlb.c| 80 +
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 38a2071cf776..4987608ea4ff 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -416,6 +416,7 @@ struct dev_links_info {
  * @dma_pools: Dma pools (if dma'ble device).
  * @dma_mem:   Internal for coherent mem override.
  * @cma_area:  Contiguous memory area for dma allocations
+ * @dma_io_tlb_mem: Internal for swiotlb io_tlb_mem override.
  * @archdata:  For arch-specific additions.
  * @of_node:   Associated device tree node.
  * @fwnode:Associated device node supplied by platform firmware.
@@ -521,6 +522,9 @@ struct device {
 #ifdef CONFIG_DMA_CMA
struct cma *cma_area;   /* contiguous memory area for dma
   allocations */
+#endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   struct io_tlb_mem *dma_io_tlb_mem;
 #endif
/* arch specific additions */
struct dev_archdata archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..03ad6e3b4056 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,7 +72,8 @@ extern enum swiotlb_force swiotlb_force;
  * range check to see if the memory was in fact allocated by this
  * API.
  * @nslabs:The number of IO TLB blocks (in groups of 64) between @start and
- * @end. This is command line adjustable via setup_io_tlb_npages.
+ * @end. For default swiotlb, this is command line adjustable via
+ * setup_io_tlb_npages.
  * @used:  The number of used IO TLB block.
  * @list:  The free list describing the number of free entries available
  * from each index.
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 57a9adb920bf..ffbb8724e06c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include 
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include 
+#include 
+#include 
+#include 
+#include 
+#endif
 
 #include 
 #include 
@@ -681,3 +688,76 @@ static int __init swiotlb_create_default_debugfs(void)
 late_initcall(swiotlb_create_default_debugfs);
 
 #endif
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   struct io_tlb_mem *mem = rmem->priv;
+   unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+
+   if (dev->dma_io_tlb_mem)
+   return 0;
+
+   /* Since multiple devices can share the same pool, the private data,
+* io_tlb_mem struct, will be initialized by the first device attached
+* to it.
+*/
+   if (!mem) {
+   mem = kzalloc(struct_size(mem, slots, nslabs), GFP_KERNEL);
+   if (!mem)
+   return -ENOMEM;
+#ifdef CONFIG_ARM
+   if (!PageHighMem(pfn_to_page(PHYS_PFN(rmem->base {
+   kfree(mem);
+   return -EINVAL;
+   }
+#endif /* CONFIG_ARM */
+   swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
+
+   rmem->priv = mem;
+   }
+
+#ifdef CONFIG_DEBUG_FS
+   if (!io_tlb_default_mem->debugfs)
+   io_tlb_default_mem->debugfs =
+   debugfs_create_dir("swiotlb", NULL);
+
+   swiotlb_create_debugfs(mem, rmem->name, io_tlb_default_mem->debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+   dev->dma_io_tlb_mem = mem;
+
+   return 0;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+   struct device *dev)
+{
+   if (dev)
+   dev->dma_io_tlb_mem = NULL;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+   .device_init = rmem_swiotlb_device_init,
+   .device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+   unsigned long node = rmem->fdt_node;
+
+   if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+   of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+   of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+   of_get_flat_dt_prop(node, "no-map", NULL))
+   return -EINVAL;
+
+   rmem->ops = &rmem_swiotlb_ops;
+   pr_info("Reserved memory: created device swiotlb memory pool at %pa, 
size %ld MiB\n",
+   &rmem->base, (unsigned long)rmem->size / SZ_1M);
+   return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
-- 
2.31.1.368.gbe11c130af-goog

___

[PATCH v5 04/16] swiotlb: Add DMA_RESTRICTED_POOL

2021-04-22 Thread Claire Chang
Add a new kconfig symbol, DMA_RESTRICTED_POOL, for restricted DMA pool.

Signed-off-by: Claire Chang 
---
 kernel/dma/Kconfig | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 77b405508743..3e961dc39634 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -80,6 +80,20 @@ config SWIOTLB
bool
select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+   bool "DMA Restricted Pool"
+   depends on OF && OF_RESERVED_MEM
+   select SWIOTLB
+   help
+ This enables support for restricted DMA pools which provide a level of
+ DMA memory protection on systems with limited hardware protection
+ capabilities, such as those lacking an IOMMU.
+
+ For more information see
+ 

+ and .
+ If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 06/16] swiotlb: Add a new get_io_tlb_mem getter

2021-04-22 Thread Claire Chang
Add a new getter, get_io_tlb_mem, to help select the io_tlb_mem struct.
The restricted DMA pool is preferred if available.

Signed-off-by: Claire Chang 
---
 include/linux/swiotlb.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 03ad6e3b4056..b469f04cca26 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include 
 #include 
 #include 
 #include 
@@ -102,6 +103,16 @@ struct io_tlb_mem {
 };
 extern struct io_tlb_mem *io_tlb_default_mem;
 
+static inline struct io_tlb_mem *get_io_tlb_mem(struct device *dev)
+{
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+   if (dev && dev->dma_io_tlb_mem)
+   return dev->dma_io_tlb_mem;
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+
+   return io_tlb_default_mem;
+}
+
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 03/16] swiotlb: Refactor swiotlb_create_debugfs

2021-04-22 Thread Claire Chang
Split the debugfs creation to make the code reusable for supporting
different bounce buffer pools, e.g. restricted DMA pool.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 3f1adee35097..57a9adb920bf 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -660,18 +660,24 @@ EXPORT_SYMBOL_GPL(is_swiotlb_active);
 
 #ifdef CONFIG_DEBUG_FS
 
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct io_tlb_mem *mem, const char *name,
+  struct dentry *node)
 {
-   struct io_tlb_mem *mem = io_tlb_default_mem;
-
if (!mem)
-   return 0;
-   mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+   return;
+
+   mem->debugfs = debugfs_create_dir(name, node);
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+   swiotlb_create_debugfs(io_tlb_default_mem, "swiotlb", NULL);
+
return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
 
 #endif
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 02/16] swiotlb: Refactor swiotlb init functions

2021-04-22 Thread Claire Chang
Add a new function, swiotlb_init_io_tlb_mem, for the io_tlb_mem struct
initialization to make the code reusable.

Note that we now also call set_memory_decrypted in swiotlb_init_with_tbl.

Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 51 ++--
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8635a57f88e9..3f1adee35097 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -166,9 +166,30 @@ void __init swiotlb_update_mem_attributes(void)
memset(vaddr, 0, bytes);
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+   unsigned long nslabs, bool late_alloc)
 {
+   void *vaddr = phys_to_virt(start);
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+
+   mem->nslabs = nslabs;
+   mem->start = start;
+   mem->end = mem->start + bytes;
+   mem->index = 0;
+   mem->late_alloc = late_alloc;
+   spin_lock_init(&mem->lock);
+   for (i = 0; i < mem->nslabs; i++) {
+   mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+   mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+   mem->slots[i].alloc_size = 0;
+   }
+
+   set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+   memset(vaddr, 0, bytes);
+}
+
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+{
struct io_tlb_mem *mem;
size_t alloc_size;
 
@@ -184,16 +205,8 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs, int verbose)
if (!mem)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
  __func__, alloc_size, PAGE_SIZE);
-   mem->nslabs = nslabs;
-   mem->start = __pa(tlb);
-   mem->end = mem->start + bytes;
-   mem->index = 0;
-   spin_lock_init(&mem->lock);
-   for (i = 0; i < mem->nslabs; i++) {
-   mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-   mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-   mem->slots[i].alloc_size = 0;
-   }
+
+   swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
 
io_tlb_default_mem = mem;
if (verbose)
@@ -280,7 +293,6 @@ swiotlb_late_init_with_default_size(size_t default_size)
 int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
-   unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
struct io_tlb_mem *mem;
 
if (swiotlb_force == SWIOTLB_NO_FORCE)
@@ -295,20 +307,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!mem)
return -ENOMEM;
 
-   mem->nslabs = nslabs;
-   mem->start = virt_to_phys(tlb);
-   mem->end = mem->start + bytes;
-   mem->index = 0;
-   mem->late_alloc = 1;
-   spin_lock_init(&mem->lock);
-   for (i = 0; i < mem->nslabs; i++) {
-   mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-   mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-   mem->slots[i].alloc_size = 0;
-   }
-
-   set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-   memset(tlb, 0, bytes);
+   swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
 
io_tlb_default_mem = mem;
swiotlb_print_info();
-- 

I'm not 100% sure if set_memory_decrypted is safe to call in
swiotlb_init_with_tbl, but I didn't hit any issue booting with this.

2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 00/16] Restricted DMA

2021-04-22 Thread Claire Chang
This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.

For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
not behind an IOMMU. As PCI-e, by design, gives the device full access to
system memory, a vulnerability in the Wi-Fi firmware could easily escalate
to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
full chain of exploits; [2], [3]).

To mitigate the security concerns, we introduce restricted DMA. Restricted
DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
specially allocated region and does memory allocation from the same region.
The feature on its own provides a basic level of protection against the DMA
overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system needs
to provide a way to restrict the DMA to a predefined memory region (this is
usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).

[1a] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] 
https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
[4] 
https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132

v5:
  Rebase on latest linux-next

v4:
  - Fix spinlock bad magic
  - Use rmem->name for debugfs entry
  - Address the comments in v3

v3:
  Using only one reserved memory region for both streaming DMA and memory
  allocation.
  https://lore.kernel.org/patchwork/cover/1360992/

v2:
  Building on top of swiotlb.
  https://lore.kernel.org/patchwork/cover/1280705/

v1:
  Using dma_map_ops.
  https://lore.kernel.org/patchwork/cover/1271660/

Claire Chang (16):
  swiotlb: Fix the type of index
  swiotlb: Refactor swiotlb init functions
  swiotlb: Refactor swiotlb_create_debugfs
  swiotlb: Add DMA_RESTRICTED_POOL
  swiotlb: Add restricted DMA pool initialization
  swiotlb: Add a new get_io_tlb_mem getter
  swiotlb: Update is_swiotlb_buffer to add a struct device argument
  swiotlb: Update is_swiotlb_active to add a struct device argument
  swiotlb: Bounce data from/to restricted DMA pool if available
  swiotlb: Move alloc_size to find_slots
  swiotlb: Refactor swiotlb_tbl_unmap_single
  dma-direct: Add a new wrapper __dma_direct_free_pages()
  swiotlb: Add restricted DMA alloc/free support.
  dma-direct: Allocate memory from restricted DMA pool if available
  dt-bindings: of: Add restricted DMA pool
  of: Add plumbing for restricted DMA pool

 .../reserved-memory/reserved-memory.txt   |  24 ++
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c |   2 +-
 drivers/iommu/dma-iommu.c |  12 +-
 drivers/of/address.c  |  25 ++
 drivers/of/device.c   |   3 +
 drivers/of/of_private.h   |   5 +
 drivers/pci/xen-pcifront.c|   2 +-
 drivers/xen/swiotlb-xen.c |   2 +-
 include/linux/device.h|   4 +
 include/linux/swiotlb.h   |  41 ++-
 kernel/dma/Kconfig|  14 +
 kernel/dma/direct.c   |  57 +++--
 kernel/dma/direct.h   |   9 +-
 kernel/dma/swiotlb.c  | 242 +-
 15 files changed, 347 insertions(+), 97 deletions(-)

-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 01/16] swiotlb: Fix the type of index

2021-04-22 Thread Claire Chang
Fix the type of index from unsigned int to int since find_slots() might
return -1.

Fixes: 0774983bc923 ("swiotlb: refactor swiotlb_tbl_map_single")
Signed-off-by: Claire Chang 
---
 kernel/dma/swiotlb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 0a5b6f7e75bc..8635a57f88e9 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -499,7 +499,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
-   unsigned int index, i;
+   unsigned int i;
+   int index;
phys_addr_t tlb_addr;
 
if (!mem)
-- 
2.31.1.368.gbe11c130af-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] iommu: rockchip: Add support iommu v2

2021-04-22 Thread Benjamin Gaignard
From: Simon Xue 

RK356x SoC got new IOMMU hardware block (version 2).
Add a compatible to distinguish it from the first version.

Signed-off-by: Simon Xue 
[Benjamin]
- port driver from kernel 4.19 to 5.12
- change functions prototype.
- squash all fixes in this commit.
Signed-off-by: Benjamin Gaignard 
---
 drivers/iommu/rockchip-iommu.c | 422 +++--
 1 file changed, 406 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e5d86b7177de..0d345d0f44fc 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -81,6 +82,30 @@
   */
 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
 
+#define DT_LO_MASK 0xf000
+#define DT_HI_MASK GENMASK_ULL(39, 32)
+#define DT_SHIFT   28
+
+#define DTE_BASE_HI_MASK GENMASK(11, 4)
+
+#define PAGE_DESC_LO_MASK   0xf000
+#define PAGE_DESC_HI1_LOWER 32
+#define PAGE_DESC_HI1_UPPER 35
+#define PAGE_DESC_HI2_LOWER 36
+#define PAGE_DESC_HI2_UPPER 39
+#define PAGE_DESC_HI_MASK1  GENMASK_ULL(PAGE_DESC_HI1_UPPER, 
PAGE_DESC_HI1_LOWER)
+#define PAGE_DESC_HI_MASK2  GENMASK_ULL(PAGE_DESC_HI2_UPPER, 
PAGE_DESC_HI2_LOWER)
+
+#define DTE_HI1_LOWER 8
+#define DTE_HI1_UPPER 11
+#define DTE_HI2_LOWER 4
+#define DTE_HI2_UPPER 7
+#define DTE_HI_MASK1  GENMASK(DTE_HI1_UPPER, DTE_HI1_LOWER)
+#define DTE_HI_MASK2  GENMASK(DTE_HI2_UPPER, DTE_HI2_LOWER)
+
+#define PAGE_DESC_HI_SHIFT1 (PAGE_DESC_HI1_LOWER - DTE_HI1_LOWER)
+#define PAGE_DESC_HI_SHIFT2 (PAGE_DESC_HI2_LOWER - DTE_HI2_LOWER)
+
 struct rk_iommu_domain {
struct list_head iommus;
u32 *dt; /* page directory table */
@@ -91,6 +116,10 @@ struct rk_iommu_domain {
struct iommu_domain domain;
 };
 
+struct rockchip_iommu_data {
+   u32 version;
+};
+
 /* list of clocks required by IOMMU */
 static const char * const rk_iommu_clocks[] = {
"aclk", "iface",
@@ -108,6 +137,7 @@ struct rk_iommu {
struct list_head node; /* entry in rk_iommu_domain.iommus */
struct iommu_domain *domain; /* domain to which iommu is attached */
struct iommu_group *group;
+   u32 version;
 };
 
 struct rk_iommudata {
@@ -174,11 +204,32 @@ static struct rk_iommu_domain *to_rk_domain(struct 
iommu_domain *dom)
 #define RK_DTE_PT_ADDRESS_MASK0xf000
 #define RK_DTE_PT_VALID   BIT(0)
 
+/*
+ * In v2:
+ * 31:12 - PT address bit 31:0
+ * 11: 8 - PT address bit 35:32
+ *  7: 4 - PT address bit 39:36
+ *  3: 1 - Reserved
+ * 0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK_V2 0xfff0
+
 static inline phys_addr_t rk_dte_pt_address(u32 dte)
 {
return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
 }
 
+static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+{
+   u64 dte_v2 = dte;
+
+   dte_v2 = ((dte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) |
+((dte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) |
+(dte_v2 & PAGE_DESC_LO_MASK);
+
+   return (phys_addr_t)dte_v2;
+}
+
 static inline bool rk_dte_is_pt_valid(u32 dte)
 {
return dte & RK_DTE_PT_VALID;
@@ -189,6 +240,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
 }
 
+static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma)
+{
+   pt_dma = (pt_dma & PAGE_DESC_LO_MASK) |
+((pt_dma & PAGE_DESC_HI_MASK1) >> PAGE_DESC_HI_SHIFT1) |
+(pt_dma & PAGE_DESC_HI_MASK2) >> PAGE_DESC_HI_SHIFT2;
+
+   return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID;
+}
+
 /*
  * Each PTE has a Page address, some flags and a valid bit:
  * +-+---+---+-+
@@ -215,11 +275,37 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
 #define RK_PTE_PAGE_READABLE  BIT(1)
 #define RK_PTE_PAGE_VALID BIT(0)
 
+/*
+ * In v2:
+ * 31:12 - Page address bit 31:0
+ *  11:9 - Page address bit 34:32
+ *   8:4 - Page address bit 39:35
+ * 3 - Security
+ * 2 - Readable
+ * 1 - Writable
+ * 0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_ADDRESS_MASK_V2  0xfff0
+#define RK_PTE_PAGE_FLAGS_MASK_V20x000e
+#define RK_PTE_PAGE_READABLE_V2  BIT(2)
+#define RK_PTE_PAGE_WRITABLE_V2  BIT(1)
+
 static inline phys_addr_t rk_pte_page_address(u32 pte)
 {
return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
 }
 
+static inline phys_addr_t rk_pte_page_address_v2(u32 pte)
+{
+   u64 pte_v2 = pte;
+
+   pte_v2 = ((pte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) |
+((pte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) |
+(pte_v2 & PAGE_DESC_LO_MASK);
+
+   return (phys_addr_t)pte_v2;
+}
+
 static inline bool rk_pte_is_page_valid(u32 pte)
 {
return pte & RK_PTE_PAGE_VALID;
@@ -229,12 +315,26 @@ static inline bool rk_pte_is_page_valid(u32 pte)
 static u32 rk_mk_pte(phys_addr_t page, int prot)

[PATCH 1/3] dt-bindings: iommu: rockchip: Convert IOMMU to DT schema

2021-04-22 Thread Benjamin Gaignard
Convert Rockchip IOMMU to DT schema

Signed-off-by: Benjamin Gaignard 
---
 .../bindings/iommu/rockchip,iommu.txt | 38 --
 .../bindings/iommu/rockchip,iommu.yaml| 76 +++
 2 files changed, 76 insertions(+), 38 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
deleted file mode 100644
index 6ecefea1c6f9..
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Rockchip IOMMU
-==
-
-A Rockchip DRM iommu translates io virtual addresses to physical addresses for
-its master device.  Each slave device is bound to a single master device, and
-shares its clocks, power domain and irq.
-
-Required properties:
-- compatible  : Should be "rockchip,iommu"
-- reg : Address space for the configuration registers
-- interrupts  : Interrupt specifier for the IOMMU instance
-- interrupt-names : Interrupt name for the IOMMU instance
-- #iommu-cells: Should be <0>.  This indicates the iommu is a
-"single-master" device, and needs no additional information
-to associate with its master device.  See:
-Documentation/devicetree/bindings/iommu/iommu.txt
-- clocks  : A list of clocks required for the IOMMU to be accessible by
-the host CPU.
-- clock-names : Should contain the following:
-   "iface" - Main peripheral bus clock (PCLK/HCL) (required)
-   "aclk"  - AXI bus clock (required)
-
-Optional properties:
-- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
-  Some mmu instances may produce unexpected results
-  when the reset operation is used.
-
-Example:
-
-   vopl_mmu: iommu@ff940300 {
-   compatible = "rockchip,iommu";
-   reg = <0xff940300 0x100>;
-   interrupts = ;
-   interrupt-names = "vopl_mmu";
-   clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
-   clock-names = "aclk", "iface";
-   #iommu-cells = <0>;
-   };
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
new file mode 100644
index ..ab128f8e4c73
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip IOMMU
+
+maintainers:
+  - Simon Xue 
+
+description: |+
+  A Rockchip DRM iommu translates io virtual addresses to physical addresses 
for
+  its master device. Each slave device is bound to a single master device, and
+  shares its clocks, power domain and irq.
+
+  For information on assigning IOMMU controller to its peripheral devices,
+  see generic IOMMU bindings.
+
+properties:
+  compatible:
+const: rockchip,iommu
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  interrupt-names:
+maxItems: 1
+
+  clocks:
+items:
+  - description: Core clock
+  - description: Interface clock
+
+  clock-names:
+items:
+  - const: aclk
+  - const: iface
+
+  "#iommu-cells":
+const: 0
+
+  rockchip,disable-mmu-reset:
+$ref: /schemas/types.yaml#/definitions/flag
+description: |
+  Don't use the mmu reset operation.
+  Some mmu instances may produce unexpected results
+  when the reset operation is used.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+
+vopl_mmu: iommu@ff940300 {
+  compatible = "rockchip,iommu";
+  reg = <0xff940300 0x100>;
+  interrupts = ;
+  interrupt-names = "vopl_mmu";
+  clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+  clock-names = "aclk", "iface";
+  #iommu-cells = <0>;
+};
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] IOMMU: Add driver for rk356x

2021-04-22 Thread Benjamin Gaignard
This series adds the IOMMU driver for rk356x SoC.
Since a new compatible is needed to distinguish this second version of 
IOMMU hardware block from the first one, it is an opportunity to convert
the binding to DT schema.

Benjamin

Benjamin Gaignard (2):
  dt-bindings: iommu: rockchip: Convert IOMMU to DT schema
  dt-bindings: iommu: rockchip: Add compatible for v2

Simon Xue (1):
  iommu: rockchip: Add support iommu v2

 .../bindings/iommu/rockchip,iommu.txt |  38 --
 .../bindings/iommu/rockchip,iommu.yaml|  78 
 drivers/iommu/rockchip-iommu.c| 422 +-
 3 files changed, 484 insertions(+), 54 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] dt-bindings: iommu: rockchip: Add compatible for v2

2021-04-22 Thread Benjamin Gaignard
Add compatible for the second version of IOMMU hardware block.

Signed-off-by: Benjamin Gaignard 
---
 Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
index ab128f8e4c73..76710dcdc3e4 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -19,7 +19,9 @@ description: |+
 
 properties:
   compatible:
-const: rockchip,iommu
+enum:
+  - rockchip,iommu
+  - rockchip,iommu-v2
 
   reg:
 maxItems: 1
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/7] MIPS/octeon: simplify swiotlb initialization

2021-04-22 Thread Christoph Hellwig
Just use swiotlb_adjust_size and swiotlb_init to initialize
swiotlb instead of doing a lot of manual work.

Signed-off-by: Christoph Hellwig 
---
 arch/mips/cavium-octeon/dma-octeon.c  | 16 ++--
 arch/mips/include/asm/octeon/pci-octeon.h |  1 -
 arch/mips/pci/pci-octeon.c|  2 +-
 3 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index 020b8ce5b8ff7c..6bc9ef5e3790ec 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -186,15 +186,12 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t 
daddr)
return daddr;
 }
 
-char *octeon_swiotlb;
-
 void __init plat_swiotlb_setup(void)
 {
phys_addr_t start, end;
phys_addr_t max_addr;
phys_addr_t addr_size;
size_t swiotlbsize;
-   unsigned long swiotlb_nslabs;
u64 i;
 
max_addr = 0;
@@ -236,15 +233,6 @@ void __init plat_swiotlb_setup(void)
if (OCTEON_IS_OCTEON2() && max_addr >= 0x1ul)
swiotlbsize = 64 * (1<<20);
 #endif
-   swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT;
-   swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE);
-   swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT;
-
-   octeon_swiotlb = memblock_alloc_low(swiotlbsize, PAGE_SIZE);
-   if (!octeon_swiotlb)
-   panic("%s: Failed to allocate %zu bytes align=%lx\n",
- __func__, swiotlbsize, PAGE_SIZE);
-
-   if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlbsize, 1) == -ENOMEM)
-   panic("Cannot allocate SWIOTLB buffer");
+   swiotlb_adjust_size(swiotlbsize);
+   swiotlb_init(false);
 }
diff --git a/arch/mips/include/asm/octeon/pci-octeon.h 
b/arch/mips/include/asm/octeon/pci-octeon.h
index b12d9a3fbfb6c0..a2f20a44fb6143 100644
--- a/arch/mips/include/asm/octeon/pci-octeon.h
+++ b/arch/mips/include/asm/octeon/pci-octeon.h
@@ -64,6 +64,5 @@ enum octeon_dma_bar_type {
 extern enum octeon_dma_bar_type octeon_dma_bar_type;
 
 void octeon_pci_dma_init(void);
-extern char *octeon_swiotlb;
 
 #endif
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index fc29b85cfa926d..ff26cd9dc083f6 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -664,7 +664,7 @@ static int __init octeon_pci_setup(void)
 
/* BAR1 movable regions contiguous to cover the swiotlb */
octeon_bar1_pci_phys =
-   virt_to_phys(octeon_swiotlb) & ~((1ull << 22) - 1);
+   io_tlb_default_mem->start & ~((1ull << 22) - 1);
 
for (index = 0; index < 32; index++) {
union cvmx_pci_bar1_indexx bar1_index;
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/7] swiotlb: use swiotlb_size_or_default in swiotlb_init

2021-04-22 Thread Christoph Hellwig
Use swiotlb_size_or_default to calculate the buffer size insted of
open coding it.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c7b3dd86db7f56..27461fd63e0330 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -209,7 +209,7 @@ int __init swiotlb_init_with_tbl(char *tlb, size_t bytes, 
int verbose)
 void  __init
 swiotlb_init(int verbose)
 {
-   size_t bytes = default_nslabs << IO_TLB_SHIFT;
+   size_t bytes = swiotlb_size_or_default();
void *tlb;
 
if (swiotlb_force == SWIOTLB_NO_FORCE)
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 7/7] swiotlb: don't override the command line in swiotlb_adjust_size

2021-04-22 Thread Christoph Hellwig
When the user specified an explicit swiotlb size on the command line,
the achitecture code should not override it.

Fixes: 2cbc2776efe4 ("swiotlb: remove swiotlb_nr_tbl")
Reported-by: Tom Lendacky 
Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 87d06ddf4753f3..aef02a3825b494 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -106,7 +106,9 @@ void swiotlb_set_max_segment(unsigned int val)
 
 unsigned long swiotlb_size_or_default(void)
 {
-   return swiotlb_default_size;
+   if (swiotlb_default_size)
+   return swiotlb_default_size;
+   return IO_TLB_DEFAULT_SIZE;
 }
 
 void __init swiotlb_adjust_size(unsigned long size)
@@ -116,6 +118,8 @@ void __init swiotlb_adjust_size(unsigned long size)
 * architectures such as those supporting memory encryption to
 * adjust/expand SWIOTLB size for their use.
 */
+   if (swiotlb_default_size)
+   return;
swiotlb_default_size = ALIGN(size, IO_TLB_SEGSIZE << IO_TLB_SHIFT);
pr_info("SWIOTLB bounce buffer size adjusted to %luMB",
swiotlb_default_size >> 20);
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/7] swiotlb: replace default_nslabs with a byte value

2021-04-22 Thread Christoph Hellwig
Replace the default_nslabs variable with one that stores the size in
bytes as that is what all the users actually expect.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 93737d0932fbf2..87d06ddf4753f3 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -71,7 +71,7 @@ struct io_tlb_mem *io_tlb_default_mem;
  */
 static unsigned int max_segment;
 
-static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long swiotlb_default_size = IO_TLB_DEFAULT_SIZE;
 
 static int __init
 setup_io_tlb_npages(char *str)
@@ -106,7 +106,7 @@ void swiotlb_set_max_segment(unsigned int val)
 
 unsigned long swiotlb_size_or_default(void)
 {
-   return default_nslabs << IO_TLB_SHIFT;
+   return swiotlb_default_size;
 }
 
 void __init swiotlb_adjust_size(unsigned long size)
@@ -116,9 +116,9 @@ void __init swiotlb_adjust_size(unsigned long size)
 * architectures such as those supporting memory encryption to
 * adjust/expand SWIOTLB size for their use.
 */
-   size = ALIGN(size, IO_TLB_SIZE);
-   default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
-   pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
+   swiotlb_default_size = ALIGN(size, IO_TLB_SEGSIZE << IO_TLB_SHIFT);
+   pr_info("SWIOTLB bounce buffer size adjusted to %luMB",
+   swiotlb_default_size >> 20);
 }
 
 void swiotlb_print_info(void)
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/7] powerpc/pseries: simplify svm_swiotlb_init

2021-04-22 Thread Christoph Hellwig
The value returned by swiotlb_size_or_default is always properly
aligned now, so don't duplicate the work.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/platforms/pseries/svm.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/svm.c 
b/arch/powerpc/platforms/pseries/svm.c
index 4d281ff56ce96f..9187d2a1ed568d 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -43,20 +43,14 @@ machine_early_initcall(pseries, init_svm);
  */
 void __init svm_swiotlb_init(void)
 {
+   unsigned long bytes = swiotlb_size_or_default();
unsigned char *vstart;
-   unsigned long bytes, io_tlb_nslabs;
-
-   io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
-   io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-
-   bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
if (vstart && !swiotlb_init_with_tbl(vstart, bytes, false))
return;
 
-   memblock_free_early(__pa(vstart),
-   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+   memblock_free_early(__pa(vstart), PAGE_ALIGN(bytes));
panic("SVM: Cannot allocate SWIOTLB buffer");
 }
 
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


cleanup and fix swiotlb sizing

2021-04-22 Thread Christoph Hellwig
Hi all,

based on a report from Tom that overriding the default sizing provided
by the x86 SEV code on the command line doesn't work anymore, this series
cleans up how we handle default and command line sizes for the swiotlb
buffer and then fixes the recently introduced bug in a straight-forward
way.

Diffstat:
 arch/mips/cavium-octeon/dma-octeon.c  |   16 +--
 arch/mips/include/asm/octeon/pci-octeon.h |1 
 arch/mips/pci/pci-octeon.c|2 -
 arch/powerpc/platforms/pseries/svm.c  |   13 ++--
 drivers/xen/swiotlb-xen.c |2 -
 include/linux/swiotlb.h   |2 -
 kernel/dma/swiotlb.c  |   32 +++---
 7 files changed, 25 insertions(+), 43 deletions(-)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/7] swiotlb: use swiotlb_adjust_size in setup_io_tlb_npages

2021-04-22 Thread Christoph Hellwig
Use the proper helper to do the proper alignment of the buffer size
to the requirements of the swiotlb allocator instead of open coding
the logic.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/swiotlb.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 27461fd63e0330..93737d0932fbf2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -76,11 +76,9 @@ static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> 
IO_TLB_SHIFT;
 static int __init
 setup_io_tlb_npages(char *str)
 {
-   if (isdigit(*str)) {
-   /* avoid tail segment of size < IO_TLB_SEGSIZE */
-   default_nslabs =
-   ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
-   }
+   if (isdigit(*str))
+   swiotlb_adjust_size(
+   simple_strtoul(str, &str, 0) << IO_TLB_SHIFT);
if (*str == ',')
++str;
if (!strcmp(str, "force"))
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/7] swiotlb: pass bytes instead of nslabs to swiotlb_init_with_tbl

2021-04-22 Thread Christoph Hellwig
Pass the actual allocation size to swiotlb_init_with_tbl, which
simplifies things quite a bit.

Signed-off-by: Christoph Hellwig 
---
 arch/mips/cavium-octeon/dma-octeon.c |  2 +-
 arch/powerpc/platforms/pseries/svm.c |  3 +--
 drivers/xen/swiotlb-xen.c|  2 +-
 include/linux/swiotlb.h  |  2 +-
 kernel/dma/swiotlb.c | 10 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index df70308db0e697..020b8ce5b8ff7c 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -245,6 +245,6 @@ void __init plat_swiotlb_setup(void)
panic("%s: Failed to allocate %zu bytes align=%lx\n",
  __func__, swiotlbsize, PAGE_SIZE);
 
-   if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
+   if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlbsize, 1) == -ENOMEM)
panic("Cannot allocate SWIOTLB buffer");
 }
diff --git a/arch/powerpc/platforms/pseries/svm.c 
b/arch/powerpc/platforms/pseries/svm.c
index 1d829e257996fb..4d281ff56ce96f 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -52,10 +52,9 @@ void __init svm_swiotlb_init(void)
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
-   if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
+   if (vstart && !swiotlb_init_with_tbl(vstart, bytes, false))
return;
 
-
memblock_free_early(__pa(vstart),
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
panic("SVM: Cannot allocate SWIOTLB buffer");
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4c89afc0df6289..18d79f07b507ce 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -249,7 +249,7 @@ void __init xen_swiotlb_init_early(void)
panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
}
 
-   if (swiotlb_init_with_tbl(start, nslabs, false))
+   if (swiotlb_init_with_tbl(start, bytes, false))
panic("Cannot allocate SWIOTLB buffer");
swiotlb_set_max_segment(PAGE_SIZE);
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e5134b..d1d40ca5014b54 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -36,7 +36,7 @@ enum swiotlb_force {
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
 
 extern void swiotlb_init(int verbose);
-int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
+int swiotlb_init_with_tbl(char *tlb, size_t bytes, int verbose);
 unsigned long swiotlb_size_or_default(void);
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
 extern int swiotlb_late_init_with_default_size(size_t default_size);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 0a5b6f7e75bce6..c7b3dd86db7f56 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -166,9 +166,9 @@ void __init swiotlb_update_mem_attributes(void)
memset(vaddr, 0, bytes);
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+int __init swiotlb_init_with_tbl(char *tlb, size_t bytes, int verbose)
 {
-   unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+   unsigned long nslabs = bytes >> IO_TLB_SHIFT, i;
struct io_tlb_mem *mem;
size_t alloc_size;
 
@@ -209,17 +209,17 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long 
nslabs, int verbose)
 void  __init
 swiotlb_init(int verbose)
 {
-   size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+   size_t bytes = default_nslabs << IO_TLB_SHIFT;
void *tlb;
 
if (swiotlb_force == SWIOTLB_NO_FORCE)
return;
 
/* Get IO TLB memory from the low pages */
-   tlb = memblock_alloc_low(bytes, PAGE_SIZE);
+   tlb = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
if (!tlb)
goto fail;
-   if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose))
+   if (swiotlb_init_with_tbl(tlb, bytes, verbose))
goto fail_free_mem;
return;
 
-- 
2.30.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu