gvt: GVTg support ppgtt pvmmio optimization

Zhang, Xiaolin Sun, 14 Oct 2018 19:39:23 -0700

On 10/11/2018 04:07 PM, Zhao, Yakui wrote:
>
> On 2018年10月11日 14:14, Xiaolin Zhang wrote:
>> This patch handles ppgtt update from g2v notification.
>>
>> It read out ppgtt pte entries from guest pte tables page and
>> convert them to host pfns.
>>
>> It creates local ppgtt tables and insert the content pages
>> into the local ppgtt tables directly, which does not track
>> the usage of guest page table and removes the cost of write
>> protection from the original shadow page mechansim.
> It is possible that Guest VGPU writes the ppgtt entry by using 2M/64K 
> page mode.
>
> If so, the gvtg should also handle it in PVMMIO mode.
it is possible that guest vgpu can support huge page mode. currently it
is a gap for pvppgtt since this feature is only valid for non-huge-page
mode.  it is WIP to support guest huge page mode.
BRs, Xiaolin
>> v1: rebase
>> v0: RFC
>>
>> Signed-off-by: Xiaolin Zhang <[email protected]>
>> ---
>>   drivers/gpu/drm/i915/gvt/gtt.c      | 318 
>> ++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/gvt/gtt.h      |   9 +
>>   drivers/gpu/drm/i915/gvt/handlers.c |  13 +-
>>   3 files changed, 338 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>> index 58e166e..8d3e21a 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>> @@ -1744,6 +1744,26 @@ static int ppgtt_handle_guest_write_page_table_bytes(
>>      return 0;
>>   }
>>   
>> +static void invalidate_mm_pv(struct intel_vgpu_mm *mm)
>> +{
>> +    struct intel_vgpu *vgpu = mm->vgpu;
>> +    struct intel_gvt *gvt = vgpu->gvt;
>> +    struct intel_gvt_gtt *gtt = &gvt->gtt;
>> +    struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
>> +    struct intel_gvt_gtt_entry se;
>> +
>> +    i915_ppgtt_close(&mm->ppgtt->vm);
>> +    i915_ppgtt_put(mm->ppgtt);
>> +
>> +    ppgtt_get_shadow_root_entry(mm, &se, 0);
>> +    if (!ops->test_present(&se))
>> +            return;
>> +    se.val64 = 0;
>> +    ppgtt_set_shadow_root_entry(mm, &se, 0);
>> +
>> +    mm->ppgtt_mm.shadowed  = false;
>> +}
>> +
>>   static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
>>   {
>>      struct intel_vgpu *vgpu = mm->vgpu;
>> @@ -1756,6 +1776,11 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm 
>> *mm)
>>      if (!mm->ppgtt_mm.shadowed)
>>              return;
>>   
>> +    if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE) {
>> +            invalidate_mm_pv(mm);
>> +            return;
>> +    }
>> +
>>      for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
>>              ppgtt_get_shadow_root_entry(mm, &se, index);
>>   
>> @@ -1773,6 +1798,26 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm 
>> *mm)
>>      mm->ppgtt_mm.shadowed = false;
>>   }
>>   
>> +static int shadow_mm_pv(struct intel_vgpu_mm *mm)
>> +{
>> +    struct intel_vgpu *vgpu = mm->vgpu;
>> +    struct intel_gvt *gvt = vgpu->gvt;
>> +    struct intel_gvt_gtt_entry se;
>> +
>> +    mm->ppgtt = i915_ppgtt_create(gvt->dev_priv, NULL);
>> +    if (IS_ERR(mm->ppgtt)) {
>> +            gvt_vgpu_err("fail to create ppgtt for pdp 0x%llx\n",
>> +                            px_dma(&mm->ppgtt->pml4));
>> +            return PTR_ERR(mm->ppgtt);
>> +    }
>> +
>> +    se.type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
>> +    se.val64 = px_dma(&mm->ppgtt->pml4);
>> +    ppgtt_set_shadow_root_entry(mm, &se, 0);
>> +    mm->ppgtt_mm.shadowed  = true;
>> +
>> +    return 0;
>> +}
>>   
>>   static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
>>   {
>> @@ -1787,6 +1832,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
>>      if (mm->ppgtt_mm.shadowed)
>>              return 0;
>>   
>> +    if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE)
>> +            return shadow_mm_pv(mm);
>> +
>>      mm->ppgtt_mm.shadowed = true;
>>   
>>      for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
>> @@ -2767,3 +2815,273 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
>>      intel_vgpu_destroy_all_ppgtt_mm(vgpu);
>>      intel_vgpu_reset_ggtt(vgpu, true);
>>   }
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[])
>> +{
>> +    struct intel_vgpu_mm *mm;
>> +    int ret = 0;
>> +    u32 offset;
>> +    struct pv_ppgtt_update pv_ppgtt;
>> +
>> +    offset = offsetof(struct gvt_shared_page, pv_ppgtt);
>> +    intel_gvt_read_shared_page(vgpu, offset, &pv_ppgtt, sizeof(pv_ppgtt));
>> +
>> +    mm = intel_vgpu_find_ppgtt_mm(vgpu, &pv_ppgtt.pdp);
>> +    if (!mm) {
>> +            gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp);
>> +            ret = -EINVAL;
>> +    } else {
>> +            ret = mm->ppgtt->vm.allocate_va_range(&mm->ppgtt->vm,
>> +                    pv_ppgtt.start, pv_ppgtt.length);
>> +            if (ret)
>> +                    gvt_vgpu_err("failed to alloc %llx\n", pv_ppgtt.pdp);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_clear_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[])
>> +{
>> +    struct intel_vgpu_mm *mm;
>> +    int ret = 0;
>> +    u32 offset;
>> +    struct pv_ppgtt_update pv_ppgtt;
>> +
>> +    offset = offsetof(struct gvt_shared_page, pv_ppgtt);
>> +    intel_gvt_read_shared_page(vgpu, offset, &pv_ppgtt, sizeof(pv_ppgtt));
>> +    mm = intel_vgpu_find_ppgtt_mm(vgpu, &pv_ppgtt.pdp);
>> +    if (!mm) {
>> +            gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp);
>> +            ret = -EINVAL;
>> +    } else {
>> +            mm->ppgtt->vm.clear_range(&mm->ppgtt->vm,
>> +                    pv_ppgtt.start, pv_ppgtt.length);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +#define GEN8_PML4E_SIZE             (1UL << GEN8_PML4E_SHIFT)
>> +#define GEN8_PML4E_SIZE_MASK        (~(GEN8_PML4E_SIZE - 1))
>> +#define GEN8_PDPE_SIZE              (1UL << GEN8_PDPE_SHIFT)
>> +#define GEN8_PDPE_SIZE_MASK (~(GEN8_PDPE_SIZE - 1))
>> +#define GEN8_PDE_SIZE               (1UL << GEN8_PDE_SHIFT)
>> +#define GEN8_PDE_SIZE_MASK  (~(GEN8_PDE_SIZE - 1))
>> +
>> +#define pml4_addr_end(addr, end)                                    \
>> +({  unsigned long __boundary = \
>> +                    ((addr) + GEN8_PML4E_SIZE) & GEN8_PML4E_SIZE_MASK; \
>> +    (__boundary < (end)) ? __boundary : (end);              \
>> +})
>> +
>> +#define pdp_addr_end(addr, end)                                             
>> \
>> +({  unsigned long __boundary = \
>> +                    ((addr) + GEN8_PDPE_SIZE) & GEN8_PDPE_SIZE_MASK; \
>> +    (__boundary < (end)) ? __boundary : (end);              \
>> +})
>> +
>> +#define pd_addr_end(addr, end)                                              
>> \
>> +({  unsigned long __boundary = \
>> +                    ((addr) + GEN8_PDE_SIZE) & GEN8_PDE_SIZE_MASK;  \
>> +    (__boundary < (end)) ? __boundary : (end);              \
>> +})
>> +
>> +struct ppgtt_walk {
>> +    unsigned long *mfns;
>> +    int mfn_index;
>> +    unsigned long *pt;
>> +};
>> +
>> +static int walk_pt_range(struct intel_vgpu *vgpu, u64 pt,
>> +                            u64 start, u64 end, struct ppgtt_walk *walk)
>> +{
>> +    const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
>> +    struct intel_gvt_gtt_gma_ops *gma_ops = vgpu->gvt->gtt.gma_ops;
>> +    unsigned long start_index, end_index;
>> +    int ret;
>> +    int i;
>> +    unsigned long mfn, gfn;
>> +
>> +    start_index = gma_ops->gma_to_pte_index(start);
>> +    end_index = ((end - start) >> PAGE_SHIFT) + start_index;
>> +
>> +    ret = intel_gvt_hypervisor_read_gpa(vgpu,
>> +            (pt & PAGE_MASK) + (start_index << info->gtt_entry_size_shift),
>> +            walk->pt + start_index,
>> +            (end_index - start_index) << info->gtt_entry_size_shift);
>> +    if (ret) {
>> +            gvt_vgpu_err("fail to read gpa %llx\n", pt);
>> +            return ret;
>> +    }
>> +
>> +    for (i = start_index; i < end_index; i++) {
>> +            gfn = walk->pt[i] >> PAGE_SHIFT;
>> +            mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
>> +            if (mfn == INTEL_GVT_INVALID_ADDR) {
>> +                    gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn);
>> +                    return -ENXIO;
>> +            }
>> +            walk->mfns[walk->mfn_index++] = mfn << PAGE_SHIFT;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +
>> +static int walk_pd_range(struct intel_vgpu *vgpu, u64 pd,
>> +                            u64 start, u64 end, struct ppgtt_walk *walk)
>> +{
>> +    const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
>> +    struct intel_gvt_gtt_gma_ops *gma_ops = vgpu->gvt->gtt.gma_ops;
>> +    unsigned long index;
>> +    u64 pt, next;
>> +    int ret  = 0;
>> +
>> +    do {
>> +            index = gma_ops->gma_to_pde_index(start);
>> +
>> +            ret = intel_gvt_hypervisor_read_gpa(vgpu,
>> +                    (pd & PAGE_MASK) + (index <<
>> +                    info->gtt_entry_size_shift), &pt, 8);
>> +            if (ret)
>> +                    return ret;
>> +            next = pd_addr_end(start, end);
>> +            walk_pt_range(vgpu, pt, start, next, walk);
>> +
>> +            start = next;
>> +    } while (start != end);
>> +
>> +    return ret;
>> +}
>> +
>> +
>> +static int walk_pdp_range(struct intel_vgpu *vgpu, u64 pdp,
>> +                              u64 start, u64 end, struct ppgtt_walk *walk)
>> +{
>> +    const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
>> +    struct intel_gvt_gtt_gma_ops *gma_ops = vgpu->gvt->gtt.gma_ops;
>> +    unsigned long index;
>> +    u64 pd, next;
>> +    int ret  = 0;
>> +
>> +    do {
>> +            index = gma_ops->gma_to_l4_pdp_index(start);
>> +
>> +            ret = intel_gvt_hypervisor_read_gpa(vgpu,
>> +                    (pdp & PAGE_MASK) + (index <<
>> +                    info->gtt_entry_size_shift), &pd, 8);
>> +            if (ret)
>> +                    return ret;
>> +            next = pdp_addr_end(start, end);
>> +            walk_pd_range(vgpu, pd, start, next, walk);
>> +            start = next;
>> +    } while (start != end);
>> +
>> +    return ret;
>> +}
>> +
>> +
>> +static int walk_pml4_range(struct intel_vgpu *vgpu, u64 pml4,
>> +                            u64 start, u64 end, struct ppgtt_walk *walk)
>> +{
>> +    const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
>> +    struct intel_gvt_gtt_gma_ops *gma_ops = vgpu->gvt->gtt.gma_ops;
>> +    unsigned long index;
>> +    u64 pdp, next;
>> +    int ret  = 0;
>> +
>> +    do {
>> +            index = gma_ops->gma_to_pml4_index(start);
>> +            ret = intel_gvt_hypervisor_read_gpa(vgpu,
>> +                    (pml4 & PAGE_MASK) + (index <<
>> +                    info->gtt_entry_size_shift), &pdp, 8);
>> +            if (ret)
>> +                    return ret;
>> +            next = pml4_addr_end(start, end);
>> +            walk_pdp_range(vgpu, pdp, start, next, walk);
>> +            start = next;
>> +    } while (start != end);
>> +
>> +    return ret;
>> +}
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_insert_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[])
>> +{
>> +    struct intel_vgpu_mm *mm;
>> +    u64 pml4, start, length;
>> +    u32 cache_level;
>> +    int ret = 0;
>> +    struct sg_table st;
>> +    struct scatterlist *sg = NULL;
>> +    int num_pages;
>> +    struct i915_vma vma;
>> +    struct ppgtt_walk walk;
>> +    int i;
>> +    u32 offset;
>> +    struct pv_ppgtt_update pv_ppgtt;
>> +
>> +    offset = offsetof(struct gvt_shared_page, pv_ppgtt);
>> +    intel_gvt_read_shared_page(vgpu, offset, &pv_ppgtt, sizeof(pv_ppgtt));
>> +    pml4 = pv_ppgtt.pdp;
>> +    start = pv_ppgtt.start;
>> +    length = pv_ppgtt.length;
>> +    cache_level = pv_ppgtt.cache_level;
>> +    num_pages = length >> PAGE_SHIFT;
>> +
>> +    mm = intel_vgpu_find_ppgtt_mm(vgpu, &pml4);
>> +    if (!mm) {
>> +            gvt_vgpu_err("fail to find mm for pml4 0x%llx\n", pml4);
>> +            return -EINVAL;
>> +    }
>> +
>> +    walk.mfn_index = 0;
>> +    walk.mfns = NULL;
>> +    walk.pt = NULL;
>> +
>> +    walk.mfns = kmalloc_array(num_pages,
>> +                    sizeof(unsigned long), GFP_KERNEL);
>> +    if (!walk.mfns) {
>> +            ret = -ENOMEM;
>> +            goto fail;
>> +    }
>> +
>> +    walk.pt = (unsigned long *)__get_free_pages(GFP_KERNEL, 0);
>> +    if (!walk.pt) {
>> +            ret = -ENOMEM;
>> +            goto fail;
>> +    }
>> +
>> +    if (sg_alloc_table(&st, num_pages, GFP_KERNEL)) {
>> +            ret = -ENOMEM;
>> +            goto fail;
>> +    }
>> +
>> +    ret = walk_pml4_range(vgpu, pml4, start, start + length, &walk);
>> +    if (ret)
>> +            goto fail_free_sg;
>> +
>> +    WARN_ON(num_pages != walk.mfn_index);
>> +
>> +    for_each_sg(st.sgl, sg, num_pages, i) {
>> +            sg->offset = 0;
>> +            sg->length = PAGE_SIZE;
>> +            sg_dma_address(sg) = walk.mfns[i];
>> +            sg_dma_len(sg) = PAGE_SIZE;
>> +    }
>> +
>> +    memset(&vma, 0, sizeof(vma));
>> +    vma.node.start = start;
>> +    vma.pages = &st;
>> +    mm->ppgtt->vm.insert_entries(&mm->ppgtt->vm, &vma, cache_level, 0);
>> +
>> +fail_free_sg:
>> +    sg_free_table(&st);
>> +fail:
>> +    kfree(walk.mfns);
>> +    free_page((unsigned long)walk.pt);
>> +
>> +    return ret;
>> +}
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
>> index a11bfee..4edaed9 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.h
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.h
>> @@ -141,6 +141,7 @@ struct intel_gvt_partial_pte {
>>   
>>   struct intel_vgpu_mm {
>>      enum intel_gvt_mm_type type;
>> +    struct i915_hw_ppgtt *ppgtt;
>>      struct intel_vgpu *vgpu;
>>   
>>      struct kref ref;
>> @@ -277,4 +278,12 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu 
>> *vgpu,
>>   int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
>>      unsigned int off, void *p_data, unsigned int bytes);
>>   
>> +int intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[]);
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_clear_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[]);
>> +
>> +int intel_vgpu_g2v_pv_ppgtt_insert_4lvl(struct intel_vgpu *vgpu,
>> +            u64 pdps[]);
>>   #endif /* _GVT_GTT_H_ */
>> diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
>> b/drivers/gpu/drm/i915/gvt/handlers.c
>> index 7a53011..1ae21cb 100644
>> --- a/drivers/gpu/drm/i915/gvt/handlers.c
>> +++ b/drivers/gpu/drm/i915/gvt/handlers.c
>> @@ -1186,7 +1186,7 @@ static int handle_g2v_notification(struct intel_vgpu 
>> *vgpu, int notification)
>>      intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
>>      struct intel_vgpu_mm *mm;
>>      u64 *pdps;
>> -
>> +    int ret = 0;
>>      pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
>>   
>>      switch (notification) {
>> @@ -1199,6 +1199,15 @@ static int handle_g2v_notification(struct intel_vgpu 
>> *vgpu, int notification)
>>      case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY:
>>      case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY:
>>              return intel_vgpu_put_ppgtt_mm(vgpu, pdps);
>> +    case VGT_G2V_PPGTT_L4_ALLOC:
>> +            ret = intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(vgpu, pdps);
>> +                    break;
>> +    case VGT_G2V_PPGTT_L4_INSERT:
>> +            ret = intel_vgpu_g2v_pv_ppgtt_insert_4lvl(vgpu, pdps);
>> +            break;
>> +    case VGT_G2V_PPGTT_L4_CLEAR:
>> +            ret = intel_vgpu_g2v_pv_ppgtt_clear_4lvl(vgpu, pdps);
>> +            break;
>>      case VGT_G2V_EXECLIST_CONTEXT_CREATE:
>>      case VGT_G2V_EXECLIST_CONTEXT_DESTROY:
>>      case 1: /* Remove this in guest driver. */
>> @@ -1206,7 +1215,7 @@ static int handle_g2v_notification(struct intel_vgpu 
>> *vgpu, int notification)
>>      default:
>>              gvt_vgpu_err("Invalid PV notification %d\n", notification);
>>      }
>> -    return 0;
>> +    return ret;
>>   }
>>   
>>   static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
>>


_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [v1 10/10] drm/i915/gvt: GVTg support ppgtt pvmmio optimization

Reply via email to