Jason,

On 9/3/2025 11:16 PM, Jason Gunthorpe wrote:
> From: Alejandro Jimenez <[email protected]>
> 
> Replace the io_pgtable versions with pt_iommu versions. The v2 page table
> uses the x86 implementation that will be eventually shared with VT-D.
> 
> This supports the same special features as the original code:
>  - increase_top for the v1 format to allow scaling from 3 to 6 levels
>  - non-present flushing
>  - Dirty tracking for v1 only
>  - __sme_set() to adjust the PTEs for CC
>  - Optimization for flushing with virtualization to minimize the range
>  - amd_iommu_pgsize_bitmap override of the native page sizes
>  - page tables allocate from the device's NUMA node
> 
> Rework the domain ops so that v1/v2 get their own ops. Make dedicated
> allocation functions for v1 and v2. Hook up invalidation for a top change
> to struct pt_iommu_flush_ops. Delete some of the iopgtable related code
> that becomes unused in this patch. The next patch will delete the rest of
> it.
> 
> This fixes a race bug in AMD's increase_address_space() implementation. It
> stores the top level and top pointer in different memory, which prevents
> other threads from reading a coherent version:
> 
>    increase_address_space()   alloc_pte()
>                                 level = pgtable->mode - 1;
>       pgtable->root  = pte;
>       pgtable->mode += 1;
>                                 pte = &pgtable->root[PM_LEVEL_INDEX(level, 
> address)];
> 
> The iommupt version is careful to put mode and root under a single
> READ_ONCE and then is careful to only READ_ONCE a single time per
> walk.
> 
> Signed-off-by: Alejandro Jimenez <[email protected]>
> Tested-by: Alejandro Jimenez <[email protected]>
> Signed-off-by: Jason Gunthorpe <[email protected]>
> ---
>  drivers/iommu/amd/Kconfig           |   5 +-
>  drivers/iommu/amd/amd_iommu.h       |   1 -
>  drivers/iommu/amd/amd_iommu_types.h |  12 +-
>  drivers/iommu/amd/io_pgtable.c      |   1 -
>  drivers/iommu/amd/iommu.c           | 538 ++++++++++++++--------------
>  5 files changed, 282 insertions(+), 275 deletions(-)
> 
> diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
> index ecef69c11144db..f2acf471cb5d9f 100644
> --- a/drivers/iommu/amd/Kconfig
> +++ b/drivers/iommu/amd/Kconfig
> @@ -11,10 +11,13 @@ config AMD_IOMMU
>       select MMU_NOTIFIER
>       select IOMMU_API
>       select IOMMU_IOVA
> -     select IOMMU_IO_PGTABLE
>       select IOMMU_SVA
>       select IOMMU_IOPF
>       select IOMMUFD_DRIVER if IOMMUFD
> +     select GENERIC_PT
> +     select IOMMU_PT
> +     select IOMMU_PT_AMDV1
> +     select IOMMU_PT_X86_64
>       depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
>       help
>         With this option you can enable support for AMD IOMMU hardware in
> diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
> index 9b4b589a54b57e..25044d28f28a8d 100644
> --- a/drivers/iommu/amd/amd_iommu.h
> +++ b/drivers/iommu/amd/amd_iommu.h
> @@ -88,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, 
> int status, int tag);
>   * the IOMMU used by this driver.
>   */
>  void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
> -void amd_iommu_update_and_flush_device_table(struct protection_domain 
> *domain);
>  void amd_iommu_domain_flush_pages(struct protection_domain *domain,
>                                 u64 address, size_t size);
>  void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
> diff --git a/drivers/iommu/amd/amd_iommu_types.h 
> b/drivers/iommu/amd/amd_iommu_types.h
> index 5219d7ddfdaa8b..90cf2bc4e14528 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -19,6 +19,7 @@
>  #include <linux/pci.h>
>  #include <linux/irqreturn.h>
>  #include <linux/io-pgtable.h>
> +#include <linux/generic_pt/iommu.h>
>  

.../...

>  }
>  
>  static void set_dte_entry(struct amd_iommu *iommu,
> -                       struct iommu_dev_data *dev_data)
> +                       struct iommu_dev_data *dev_data,
> +                       phys_addr_t top_paddr, unsigned int top_level)
>  {
>       u16 domid;
>       u32 old_domid;
> @@ -2059,19 +2029,36 @@ static void set_dte_entry(struct amd_iommu *iommu,
>       struct protection_domain *domain = dev_data->domain;
>       struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
>       struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
> -
> -     if (gcr3_info && gcr3_info->gcr3_tbl)
> -             domid = dev_data->gcr3_info.domid;
> -     else
> -             domid = domain->id;
> +     struct pt_iommu_amdv1_hw_info pt_info;
>  
>       make_clear_dte(dev_data, dte, &new);
>  
> -     if (domain->iop.mode != PAGE_MODE_NONE)
> -             new.data[0] |= iommu_virt_to_phys(domain->iop.root);
> +     if (gcr3_info && gcr3_info->gcr3_tbl)
> +             domid = dev_data->gcr3_info.domid;
> +     else {
> +             domid = domain->id;
>  
> -     new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
> -                 << DEV_ENTRY_MODE_SHIFT;
> +             if (domain->domain.type & __IOMMU_DOMAIN_PAGING) {
> +                     /*
> +                      * When updating the IO pagetable, the new top and level
> +                      * are provided as parameters. For other operations i.e.
> +                      * device attach, retrieve the current pagetable info
> +                      * via the IOMMU PT API.
> +                      */
> +                     if (top_paddr) {
> +                             pt_info.host_pt_root = top_paddr;
> +                             pt_info.mode = top_level + 1;
> +                     } else {
> +                             WARN_ON(top_paddr || top_level);
> +                             pt_iommu_amdv1_hw_info(&domain->amdv1,
> +                                                    &pt_info);
> +                     }
> +
> +                     new.data[0] |= pt_info.host_pt_root |

When SME is ON, we need to set the C bit for root pointer.
(__sme_set(pt_info.host_pt_root)).

-Vasant


Reply via email to