Re: [RFC PATCH 05/11] iommu/arm-smmu-v3: Merge a span of page to block descriptor

2021-02-07 Thread Keqian Zhu
Hi Robin,

On 2021/2/5 3:52, Robin Murphy wrote:
> On 2021-01-28 15:17, Keqian Zhu wrote:
>> From: jiangkunkun 
>>
>> When stop dirty log tracking, we need to recover all block descriptors
>> which are splited when start dirty log tracking. This adds a new
>> interface named merge_page in iommu layer and arm smmuv3 implements it,
>> which reinstall block mappings and unmap the span of page mappings.
>>
>> It's caller's duty to find contiuous physical memory.
>>
>> During merging page, other interfaces are not expected to be working,
>> so race condition does not exist. And we flush all iotlbs after the merge
>> procedure is completed to ease the pressure of iommu, as we will merge a
>> huge range of page mappings in general.
> 
> Again, I think we need better reasoning than "race conditions don't exist 
> because we don't expect them to exist".
Sure, because they can't. ;-)

> 
>> Co-developed-by: Keqian Zhu 
>> Signed-off-by: Kunkun Jiang 
>> ---
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 20 ++
>>   drivers/iommu/io-pgtable-arm.c  | 78 +
>>   drivers/iommu/iommu.c   | 75 
>>   include/linux/io-pgtable.h  |  2 +
>>   include/linux/iommu.h   | 10 +++
>>   5 files changed, 185 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index 5469f4fca820..2434519e4bb6 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -2529,6 +2529,25 @@ static size_t arm_smmu_split_block(struct 
>> iommu_domain *domain,
>>   return ops->split_block(ops, iova, size);
>>   }
[...]

>> +
>> +size_t iommu_merge_page(struct iommu_domain *domain, unsigned long iova,
>> +size_t size, int prot)
>> +{
>> +phys_addr_t phys;
>> +dma_addr_t p, i;
>> +size_t cont_size, merged_size;
>> +size_t merged = 0;
>> +
>> +while (size) {
>> +phys = iommu_iova_to_phys(domain, iova);
>> +cont_size = PAGE_SIZE;
>> +p = phys + cont_size;
>> +i = iova + cont_size;
>> +
>> +while (cont_size < size && p == iommu_iova_to_phys(domain, i)) {
>> +p += PAGE_SIZE;
>> +i += PAGE_SIZE;
>> +cont_size += PAGE_SIZE;
>> +}
>> +
>> +merged_size = __iommu_merge_page(domain, iova, phys, cont_size,
>> +prot);
> 
> This is incredibly silly. The amount of time you'll spend just on walking the 
> tables in all those iova_to_phys() calls is probably significantly more than 
> it would take the low-level pagetable code to do the entire operation for 
> itself. At this level, any knowledge of how mappings are actually constructed 
> is lost once __iommu_map() returns, so we just don't know, and for this 
> operation in particular there seems little point in trying to guess - the 
> driver backend still has to figure out whether something we *think* might me 
> mergeable actually is, so it's better off doing the entire operation in a 
> single pass by itself.
>
> There's especially little point in starting all this work *before* checking 
> that it's even possible...
>
> Robin.

Well, this looks silly indeed. But the iova->phys info is only stored in 
pgtable. It seems that there is no other method to find continuous physical 
address :-( (actually, the vfio_iommu_replay() has similar logic).

We put the finding procedure of continuous physical address in common iommu 
layer, because this is a common logic for all types of iommu driver.

If a vendor iommu driver thinks (iova, phys, cont_size) is not merge-able, it 
can make its own decision to map them. This keeps same as iommu_map(), which 
provides (iova, paddr, pgsize) to vendor driver, and vendor driver can make its 
own decision to map them.

Do I understand your idea correctly?

Thanks,
Keqian
> 
>> +iova += merged_size;
>> +size -= merged_size;
>> +merged += merged_size;
>> +
>> +if (merged_size != cont_size)
>> +break;
>> +}
>> +iommu_flush_iotlb_all(domain);
>> +
>> +return merged;
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_merge_page);
>> +
>>   void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>>   {
>>   const struct iommu_ops *ops = dev->bus->iommu_ops;
>> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
>> index b87c6f4ecaa2..754b62a1bbaf 100644
>> --- a/include/linux/io-pgtable.h
>> +++ b/include/linux/io-pgtable.h
>> @@ -164,6 +164,8 @@ struct io_pgtable_ops {
>>   unsigned long iova);
>>   size_t (*split_block)(struct io_pgtable_ops *ops, unsigned long iova,
>> size_t size);
>> +size_t (*merge_page)(struct io_pgtable_ops *ops, unsigned long iova,
>> + phys_addr_t phys, size_t size, int prot);
>>   };
>> /**
>> diff --git a/include/linux/iommu.h 

Re: [RFC PATCH 05/11] iommu/arm-smmu-v3: Merge a span of page to block descriptor

2021-02-04 Thread Robin Murphy

On 2021-01-28 15:17, Keqian Zhu wrote:

From: jiangkunkun 

When stop dirty log tracking, we need to recover all block descriptors
which are splited when start dirty log tracking. This adds a new
interface named merge_page in iommu layer and arm smmuv3 implements it,
which reinstall block mappings and unmap the span of page mappings.

It's caller's duty to find contiuous physical memory.

During merging page, other interfaces are not expected to be working,
so race condition does not exist. And we flush all iotlbs after the merge
procedure is completed to ease the pressure of iommu, as we will merge a
huge range of page mappings in general.


Again, I think we need better reasoning than "race conditions don't 
exist because we don't expect them to exist".



Co-developed-by: Keqian Zhu 
Signed-off-by: Kunkun Jiang 
---
  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 20 ++
  drivers/iommu/io-pgtable-arm.c  | 78 +
  drivers/iommu/iommu.c   | 75 
  include/linux/io-pgtable.h  |  2 +
  include/linux/iommu.h   | 10 +++
  5 files changed, 185 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 5469f4fca820..2434519e4bb6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2529,6 +2529,25 @@ static size_t arm_smmu_split_block(struct iommu_domain 
*domain,
return ops->split_block(ops, iova, size);
  }
  
+static size_t arm_smmu_merge_page(struct iommu_domain *domain, unsigned long iova,

+ phys_addr_t paddr, size_t size, int prot)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+
+   if (!(smmu->features & (ARM_SMMU_FEAT_BBML1 | ARM_SMMU_FEAT_BBML2))) {
+   dev_err(smmu->dev, "don't support BBML1/2 and merge page\n");
+   return 0;
+   }
+
+   if (!ops || !ops->merge_page) {
+   pr_err("don't support merge page\n");
+   return 0;
+   }
+
+   return ops->merge_page(ops, iova, paddr, size, prot);
+}
+
  static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
  {
return iommu_fwspec_add_ids(dev, args->args, 1);
@@ -2629,6 +2648,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_get_attr= arm_smmu_domain_get_attr,
.domain_set_attr= arm_smmu_domain_set_attr,
.split_block= arm_smmu_split_block,
+   .merge_page = arm_smmu_merge_page,
.of_xlate   = arm_smmu_of_xlate,
.get_resv_regions   = arm_smmu_get_resv_regions,
.put_resv_regions   = generic_iommu_put_resv_regions,
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f3b7f7115e38..17390f258eb1 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -800,6 +800,83 @@ static size_t arm_lpae_split_block(struct io_pgtable_ops 
*ops,
return __arm_lpae_split_block(data, iova, size, lvl, ptep);
  }
  
+static size_t __arm_lpae_merge_page(struct arm_lpae_io_pgtable *data,

+   unsigned long iova, phys_addr_t paddr,
+   size_t size, int lvl, arm_lpae_iopte *ptep,
+   arm_lpae_iopte prot)
+{
+   arm_lpae_iopte pte, *tablep;
+   struct io_pgtable *iop = >iop;
+   struct io_pgtable_cfg *cfg = >iop.cfg;
+
+   if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+   return 0;
+
+   ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+   pte = READ_ONCE(*ptep);
+   if (WARN_ON(!pte))
+   return 0;
+
+   if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
+   if (iopte_leaf(pte, lvl, iop->fmt))
+   return size;
+
+   /* Race does not exist */
+   if (cfg->bbml == 1) {
+   prot |= ARM_LPAE_PTE_NT;
+   __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+   io_pgtable_tlb_flush_walk(iop, iova, size,
+ ARM_LPAE_GRANULE(data));
+
+   prot &= ~(ARM_LPAE_PTE_NT);
+   __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+   } else {
+   __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+   }
+
+   tablep = iopte_deref(pte, data);
+   __arm_lpae_free_pgtable(data, lvl + 1, tablep);
+   return size;
+   } else if (iopte_leaf(pte, lvl, iop->fmt)) {
+   /* The size is too small, already merged */
+   return size;
+   }
+
+   /* Keep on walkin */
+   ptep = iopte_deref(pte, data);
+   return