Re: [PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support

2022-05-31 Thread Joao Martins
On 5/31/22 13:39, Suravee Suthikulpanit wrote:
> On 4/29/22 4:09 AM, Joao Martins wrote:
>> AMD implementation of unmap_read_dirty() is pretty simple as
>> mostly reuses unmap code with the extra addition of marshalling
>> the dirty bit into the bitmap as it walks the to-be-unmapped
>> IOPTE.
>>
>> Extra care is taken though, to switch over to cmpxchg as opposed
>> to a non-serialized store to the PTE and testing the dirty bit
>> only set until cmpxchg succeeds to set to 0.
>>
>> Signed-off-by: Joao Martins 
>> ---
>>   drivers/iommu/amd/io_pgtable.c | 44 +-
>>   drivers/iommu/amd/iommu.c  | 22 +
>>   2 files changed, 60 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
>> index 8325ef193093..1868c3b58e6d 100644
>> --- a/drivers/iommu/amd/io_pgtable.c
>> +++ b/drivers/iommu/amd/io_pgtable.c
>> @@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct 
>> list_head *freelist)
>>  free_sub_pt(pt, mode, freelist);
>>   }
>>   
>> +static bool free_pte_dirty(u64 *pte, u64 pteval)
> 
> Nitpick: Since we free and clearing the dirty bit, should we change
> the function name to free_clear_pte_dirty()?
> 

We free and *read* the dirty bit. It just so happens that we clear dirty
bit and every other one in the process. Just to make sure that I am not
clear the dirty bit explicitly (like the read_and_clear_dirty())

>> +{
>> +bool dirty = false;
>> +
>> +while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0)))
> 
> We should use 0ULL instead of 0.
>

ack.

>> +dirty = true;
>> +
>> +return dirty;
>> +}
>> +
> 
> Actually, what do you think if we enhance the current free_clear_pte()
> to also handle the check dirty as well?
> 
See further below, about dropping this patch.

>>   /*
>>* Generic mapping functions. It maps a physical address into a DMA
>>* address space. It allocates the page table pages if necessary.
>> @@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops 
>> *ops, unsigned long iova,
>>  return ret;
>>   }
>>   
>> -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
>> -  unsigned long iova,
>> -  size_t size,
>> -  struct iommu_iotlb_gather *gather)
>> +static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops,
>> +   unsigned long iova,
>> +   size_t size,
>> +   struct iommu_iotlb_gather *gather,
>> +   struct iommu_dirty_bitmap *dirty)
>>   {
>>  struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
>>  unsigned long long unmapped;
>> @@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct 
>> io_pgtable_ops *ops,
>>  while (unmapped < size) {
>>  pte = fetch_pte(pgtable, iova, _size);
>>  if (pte) {
>> -int i, count;
>> +unsigned long i, count;
>> +bool pte_dirty = false;
>>   
>>  count = PAGE_SIZE_PTE_COUNT(unmap_size);
>>  for (i = 0; i < count; i++)
>> -pte[i] = 0ULL;
>> +pte_dirty |= free_pte_dirty([i], pte[i]);
>> +
> 
> Actually, what if we change the existing free_clear_pte() to 
> free_and_clear_dirty_pte(),
> and incorporate the logic for
> 
Likewise, but otherwise it would be a good idea.

>> ...
>> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
>> index 0a86392b2367..a8fcb6e9a684 100644
>> --- a/drivers/iommu/amd/iommu.c
>> +++ b/drivers/iommu/amd/iommu.c
>> @@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain 
>> *dom, unsigned long iova,
>>  return r;
>>   }
>>   
>> +static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom,
>> + unsigned long iova, size_t page_size,
>> + struct iommu_iotlb_gather *gather,
>> + struct iommu_dirty_bitmap *dirty)
>> +{
>> +struct protection_domain *domain = to_pdomain(dom);
>> +struct io_pgtable_ops *ops = >iop.iop.ops;
>> +size_t r;
>> +
>> +if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
>> +(domain->iop.mode == PAGE_MODE_NONE))
>> +return 0;
>> +
>> +r = (ops->unmap_read_dirty) ?
>> +ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0;
>> +
>> +amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
>> +
>> +return r;
>> +}
>> +
> 
> Instead of creating a new function, what if we enhance the current 
> amd_iommu_unmap()
> to also handle read dirty part as well (e.g. __amd_iommu_unmap_read_dirty()), 
> and
> then both amd_iommu_unmap() and 

Re: [PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support

2022-05-31 Thread Suravee Suthikulpanit via iommu




On 4/29/22 4:09 AM, Joao Martins wrote:

AMD implementation of unmap_read_dirty() is pretty simple as
mostly reuses unmap code with the extra addition of marshalling
the dirty bit into the bitmap as it walks the to-be-unmapped
IOPTE.

Extra care is taken though, to switch over to cmpxchg as opposed
to a non-serialized store to the PTE and testing the dirty bit
only set until cmpxchg succeeds to set to 0.

Signed-off-by: Joao Martins 
---
  drivers/iommu/amd/io_pgtable.c | 44 +-
  drivers/iommu/amd/iommu.c  | 22 +
  2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 8325ef193093..1868c3b58e6d 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct 
list_head *freelist)
free_sub_pt(pt, mode, freelist);
  }
  
+static bool free_pte_dirty(u64 *pte, u64 pteval)


Nitpick: Since we free and clearing the dirty bit, should we change
the function name to free_clear_pte_dirty()?


+{
+   bool dirty = false;
+
+   while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0)))


We should use 0ULL instead of 0.


+   dirty = true;
+
+   return dirty;
+}
+


Actually, what do you think if we enhance the current free_clear_pte()
to also handle the check dirty as well?


  /*
   * Generic mapping functions. It maps a physical address into a DMA
   * address space. It allocates the page table pages if necessary.
@@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
  }
  
-static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,

- unsigned long iova,
- size_t size,
- struct iommu_iotlb_gather *gather)
+static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+  unsigned long iova,
+  size_t size,
+  struct iommu_iotlb_gather *gather,
+  struct iommu_dirty_bitmap *dirty)
  {
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
@@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct 
io_pgtable_ops *ops,
while (unmapped < size) {
pte = fetch_pte(pgtable, iova, _size);
if (pte) {
-   int i, count;
+   unsigned long i, count;
+   bool pte_dirty = false;
  
  			count = PAGE_SIZE_PTE_COUNT(unmap_size);

for (i = 0; i < count; i++)
-   pte[i] = 0ULL;
+   pte_dirty |= free_pte_dirty([i], pte[i]);
+


Actually, what if we change the existing free_clear_pte() to 
free_and_clear_dirty_pte(),
and incorporate the logic for


...
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 0a86392b2367..a8fcb6e9a684 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, 
unsigned long iova,
return r;
  }
  
+static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom,

+unsigned long iova, size_t page_size,
+struct iommu_iotlb_gather *gather,
+struct iommu_dirty_bitmap *dirty)
+{
+   struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = >iop.iop.ops;
+   size_t r;
+
+   if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+   (domain->iop.mode == PAGE_MODE_NONE))
+   return 0;
+
+   r = (ops->unmap_read_dirty) ?
+   ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0;
+
+   amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+   return r;
+}
+


Instead of creating a new function, what if we enhance the current 
amd_iommu_unmap()
to also handle read dirty part as well (e.g. __amd_iommu_unmap_read_dirty()), 
and
then both amd_iommu_unmap() and amd_iommu_unmap_read_dirty() can call
the __amd_iommu_unmap_read_dirty()?

Best Regards,
Suravee
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support

2022-04-28 Thread Joao Martins
AMD implementation of unmap_read_dirty() is pretty simple as
mostly reuses unmap code with the extra addition of marshalling
the dirty bit into the bitmap as it walks the to-be-unmapped
IOPTE.

Extra care is taken though, to switch over to cmpxchg as opposed
to a non-serialized store to the PTE and testing the dirty bit
only set until cmpxchg succeeds to set to 0.

Signed-off-by: Joao Martins 
---
 drivers/iommu/amd/io_pgtable.c | 44 +-
 drivers/iommu/amd/iommu.c  | 22 +
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 8325ef193093..1868c3b58e6d 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct 
list_head *freelist)
free_sub_pt(pt, mode, freelist);
 }
 
+static bool free_pte_dirty(u64 *pte, u64 pteval)
+{
+   bool dirty = false;
+
+   while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0)))
+   dirty = true;
+
+   return dirty;
+}
+
 /*
  * Generic mapping functions. It maps a physical address into a DMA
  * address space. It allocates the page table pages if necessary.
@@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
-static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
- unsigned long iova,
- size_t size,
- struct iommu_iotlb_gather *gather)
+static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+  unsigned long iova,
+  size_t size,
+  struct iommu_iotlb_gather *gather,
+  struct iommu_dirty_bitmap *dirty)
 {
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
@@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct 
io_pgtable_ops *ops,
while (unmapped < size) {
pte = fetch_pte(pgtable, iova, _size);
if (pte) {
-   int i, count;
+   unsigned long i, count;
+   bool pte_dirty = false;
 
count = PAGE_SIZE_PTE_COUNT(unmap_size);
for (i = 0; i < count; i++)
-   pte[i] = 0ULL;
+   pte_dirty |= free_pte_dirty([i], pte[i]);
+
+   if (unlikely(pte_dirty && dirty))
+   iommu_dirty_bitmap_record(dirty, iova, 
unmap_size);
}
 
iova = (iova & ~(unmap_size - 1)) + unmap_size;
@@ -461,6 +476,22 @@ static unsigned long iommu_v1_unmap_page(struct 
io_pgtable_ops *ops,
return unmapped;
 }
 
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+unsigned long iova,
+size_t size,
+struct iommu_iotlb_gather *gather)
+{
+   return __iommu_v1_unmap_page(ops, iova, size, gather, NULL);
+}
+
+static unsigned long iommu_v1_unmap_page_read_dirty(struct io_pgtable_ops *ops,
+   unsigned long iova, size_t size,
+   struct iommu_iotlb_gather *gather,
+   struct iommu_dirty_bitmap *dirty)
+{
+   return __iommu_v1_unmap_page(ops, iova, size, gather, dirty);
+}
+
 static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned 
long iova)
 {
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
@@ -575,6 +606,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
pgtable->iop.ops.unmap= iommu_v1_unmap_page;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
+   pgtable->iop.ops.unmap_read_dirty = iommu_v1_unmap_page_read_dirty;
 
return >iop;
 }
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 0a86392b2367..a8fcb6e9a684 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, 
unsigned long iova,
return r;
 }
 
+static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom,
+unsigned long iova, size_t page_size,
+struct iommu_iotlb_gather *gather,
+struct iommu_dirty_bitmap *dirty)
+{
+   struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = >iop.iop.ops;
+