Add page-selective IOTLB flush using HVCALL_FLUSH_DEVICE_DOMAIN_LIST. This hypercall accepts a list of (page_number, page_mask_shift) entries, enabling finer-grained IOTLB invalidation compared to the domain-wide HVCALL_FLUSH_DEVICE_DOMAIN used by hv_iommu_flush_iotlb_all().
hv_iommu_calc_flush_range() computes the smallest power-of-two aligned range that covers the target IOVA region, producing a single flush descriptor. This may over-flush when the range is not naturally aligned, matching the approach used by Intel VT-d PSI. If the page-selective flush fails, the code falls back to a full domain flush. Signed-off-by: Easwar Hariharan <[email protected]> Signed-off-by: Yu Zhang <[email protected]> --- drivers/iommu/hyperv/iommu.c | 68 +++++++++++++++++++++++++++++++++++- include/hyperv/hvgdk_mini.h | 1 + include/hyperv/hvhdk_mini.h | 17 +++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/hyperv/iommu.c b/drivers/iommu/hyperv/iommu.c index 254136946404..e9b104a322fd 100644 --- a/drivers/iommu/hyperv/iommu.c +++ b/drivers/iommu/hyperv/iommu.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "Hyper-V pvIOMMU: " fmt #define dev_fmt(fmt) pr_fmt(fmt) +#include <linux/hyperv.h> #include <linux/iommu.h> #include <linux/pci.h> #include <linux/dma-map-ops.h> @@ -401,10 +402,74 @@ static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain) hv_flush_device_domain(to_hv_iommu_domain(domain)); } +/* + * Calculate the minimal power-of-two aligned range that covers [start, end] + * (end is inclusive). Returns a single (page_number, page_mask_shift) + * descriptor that may over-flush when the range is not naturally aligned. + */ +static void hv_iommu_calc_flush_range(unsigned long start, unsigned long end, + union hv_iommu_flush_va *va) +{ + unsigned long start_pfn = HVPFN_DOWN(start); + unsigned long last_pfn = HVPFN_UP(end + 1) - 1; + unsigned long mask_shift, aligned_pfn; + + if (start_pfn == last_pfn) { + mask_shift = 0; + } else { + /* + * Find the highest bit position where start_pfn and last_pfn + * differ. A range aligned to one above that bit is the + * smallest power-of-two region that covers both endpoints. + */ + mask_shift = __fls(start_pfn ^ last_pfn) + 1; + } + + aligned_pfn = ALIGN_DOWN(start_pfn, 1UL << mask_shift); + va->page_number = aligned_pfn; + va->page_mask_shift = mask_shift; +} + +static void hv_flush_device_domain_list(struct hv_iommu_domain *hv_domain, + struct iommu_iotlb_gather *iotlb_gather) +{ + u64 status; + unsigned long flags; + struct hv_input_flush_device_domain_list *input; + + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + + input->device_domain = hv_domain->device_domain; + input->flags |= HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT; + hv_iommu_calc_flush_range(iotlb_gather->start, iotlb_gather->end, + &input->iova_list[0]); + + status = hv_do_rep_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN_LIST, + 1, 0, input, NULL); + + if (!hv_result_success(status)) { + /* Page-selective flush failed, fall back to full flush. */ + struct hv_input_flush_device_domain *flush_all = (void *)input; + + memset(flush_all, 0, sizeof(*flush_all)); + flush_all->device_domain = hv_domain->device_domain; + status = hv_do_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN, + flush_all, NULL); + WARN(!hv_result_success(status), + "HVCALL_FLUSH_DEVICE_DOMAIN fallback also failed: %lld\n", + status); + } + + local_irq_restore(flags); +} + static void hv_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - hv_flush_device_domain(to_hv_iommu_domain(domain)); + hv_flush_device_domain_list(to_hv_iommu_domain(domain), iotlb_gather); iommu_put_pages_list(&iotlb_gather->freelist); } @@ -455,6 +520,7 @@ static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev) cfg.common.hw_max_vasz_lg2 = hv_iommu_device->max_iova_width; cfg.common.hw_max_oasz_lg2 = 52; + cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE); cfg.top_level = (hv_iommu_device->max_iova_width > 48) ? 4 : 3; ret = pt_iommu_x86_64_init(&hv_domain->pt_iommu_x86_64, &cfg, GFP_KERNEL); diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 5bdbb44da112..eaaf87171478 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -496,6 +496,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9 #define HVCALL_CONFIGURE_DEVICE_DOMAIN 0x00ce #define HVCALL_FLUSH_DEVICE_DOMAIN 0x00d0 +#define HVCALL_FLUSH_DEVICE_DOMAIN_LIST 0x00d1 #define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7 #define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 493608e791b4..f51d5d9467f1 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -671,4 +671,21 @@ struct hv_input_flush_device_domain { u32 reserved; } __packed; +union hv_iommu_flush_va { + u64 iova; + struct { + u64 page_mask_shift : 12; + u64 page_number : 52; + }; +} __packed; + + +struct hv_input_flush_device_domain_list { + struct hv_input_device_domain device_domain; +#define HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT (1 << 0) + u32 flags; + u32 reserved; + union hv_iommu_flush_va iova_list[]; +} __packed; + #endif /* _HV_HVHDK_MINI_H */ -- 2.52.0

