On 4/29/22 4:09 AM, Joao Martins wrote:
AMD implementation of unmap_read_dirty() is pretty simple as
mostly reuses unmap code with the extra addition of marshalling
the dirty bit into the bitmap as it walks the to-be-unmapped
IOPTE.

Extra care is taken though, to switch over to cmpxchg as opposed
to a non-serialized store to the PTE and testing the dirty bit
only set until cmpxchg succeeds to set to 0.

Signed-off-by: Joao Martins <[email protected]>
---
  drivers/iommu/amd/io_pgtable.c | 44 +++++++++++++++++++++++++++++-----
  drivers/iommu/amd/iommu.c      | 22 +++++++++++++++++
  2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 8325ef193093..1868c3b58e6d 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct 
list_head *freelist)
        free_sub_pt(pt, mode, freelist);
  }
+static bool free_pte_dirty(u64 *pte, u64 pteval)

Nitpick: Since we free and clearing the dirty bit, should we change
the function name to free_clear_pte_dirty()?

+{
+       bool dirty = false;
+
+       while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0)))

We should use 0ULL instead of 0.

+               dirty = true;
+
+       return dirty;
+}
+

Actually, what do you think if we enhance the current free_clear_pte()
to also handle the check dirty as well?

  /*
   * Generic mapping functions. It maps a physical address into a DMA
   * address space. It allocates the page table pages if necessary.
@@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, 
unsigned long iova,
        return ret;
  }
-static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
-                                     unsigned long iova,
-                                     size_t size,
-                                     struct iommu_iotlb_gather *gather)
+static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+                                          unsigned long iova,
+                                          size_t size,
+                                          struct iommu_iotlb_gather *gather,
+                                          struct iommu_dirty_bitmap *dirty)
  {
        struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
        unsigned long long unmapped;
@@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct 
io_pgtable_ops *ops,
        while (unmapped < size) {
                pte = fetch_pte(pgtable, iova, &unmap_size);
                if (pte) {
-                       int i, count;
+                       unsigned long i, count;
+                       bool pte_dirty = false;
count = PAGE_SIZE_PTE_COUNT(unmap_size);
                        for (i = 0; i < count; i++)
-                               pte[i] = 0ULL;
+                               pte_dirty |= free_pte_dirty(&pte[i], pte[i]);
+

Actually, what if we change the existing free_clear_pte() to 
free_and_clear_dirty_pte(),
and incorporate the logic for

...
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 0a86392b2367..a8fcb6e9a684 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, 
unsigned long iova,
        return r;
  }
+static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom,
+                                        unsigned long iova, size_t page_size,
+                                        struct iommu_iotlb_gather *gather,
+                                        struct iommu_dirty_bitmap *dirty)
+{
+       struct protection_domain *domain = to_pdomain(dom);
+       struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+       size_t r;
+
+       if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+           (domain->iop.mode == PAGE_MODE_NONE))
+               return 0;
+
+       r = (ops->unmap_read_dirty) ?
+               ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0;
+
+       amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+       return r;
+}
+

Instead of creating a new function, what if we enhance the current 
amd_iommu_unmap()
to also handle read dirty part as well (e.g. __amd_iommu_unmap_read_dirty()), 
and
then both amd_iommu_unmap() and amd_iommu_unmap_read_dirty() can call
the __amd_iommu_unmap_read_dirty()?

Best Regards,
Suravee
_______________________________________________
iommu mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to