Chris Wilson <[email protected]> writes:

> We flush the entire page every time we update a few bytes, making the
> update of a page table many, many times slower than is required. If we
> create a WC map of the page for our updates, we can avoid the clflush
> but incur additional cost for creating the pagetable. We amoritize that
> cost by reusing page vmappings, and only changing the page protection in
> batches.
>
> Signed-off-by: Chris Wilson <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 329 
> ++++++++++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +
>  2 files changed, 172 insertions(+), 162 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 45bab7b7b026..302aee193ce5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -340,43 +340,69 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>       return pte;
>  }
>  
> -static int __setup_page_dma(struct drm_i915_private *dev_priv,
> -                         struct i915_page_dma *p, gfp_t flags)
> +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -     struct device *kdev = &dev_priv->drm.pdev->dev;
> +     struct page *page;
>  
> -     p->page = alloc_page(flags);
> -     if (!p->page)
> -             return -ENOMEM;
> +     if (vm->free_pages.nr)
> +             return vm->free_pages.pages[--vm->free_pages.nr];
>  
> -     p->daddr = dma_map_page(kdev,
> -                             p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +     page = alloc_page(gfp);
> +     if (!page)
> +             return NULL;
>  
> -     if (dma_mapping_error(kdev, p->daddr)) {
> -             __free_page(p->page);
> -             return -EINVAL;
> +     if (vm->pt_kmap_wc)
> +             set_pages_array_wc(&page, 1);
> +
> +     return page;
> +}
> +
> +static void vm_free_pages_release(struct i915_address_space *vm)
> +{
> +     GEM_BUG_ON(!pagevec_count(&vm->free_pages));
> +
> +     if (vm->pt_kmap_wc)
> +             set_pages_array_wb(vm->free_pages.pages,
> +                                pagevec_count(&vm->free_pages));
> +
> +     __pagevec_release(&vm->free_pages);
> +}
> +
> +static void vm_free_page(struct i915_address_space *vm, struct page *page)
> +{
> +     if (!pagevec_add(&vm->free_pages, page))
> +             vm_free_pages_release(vm);

What about the page you failed to push to vec?

For me it seems that you are missing a retry after
vec cleanup.

-Mika

> +}
> +
> +static int __setup_page_dma(struct i915_address_space *vm,
> +                         struct i915_page_dma *p,
> +                         gfp_t gfp)
> +{
> +     p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
> +     if (unlikely(!p->page))
> +             return -ENOMEM;
> +
> +     p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
> +                             PCI_DMA_BIDIRECTIONAL);
> +     if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
> +             vm_free_page(vm, p->page);
> +             return -ENOMEM;
>       }
>  
>       return 0;
>  }
>  
> -static int setup_page_dma(struct drm_i915_private *dev_priv,
> +static int setup_page_dma(struct i915_address_space *vm,
>                         struct i915_page_dma *p)
>  {
> -     return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
> +     return __setup_page_dma(vm, p, I915_GFP_DMA);
>  }
>  
> -static void cleanup_page_dma(struct drm_i915_private *dev_priv,
> +static void cleanup_page_dma(struct i915_address_space *vm,
>                            struct i915_page_dma *p)
>  {
> -     struct pci_dev *pdev = dev_priv->drm.pdev;
> -
> -     if (WARN_ON(!p->page))
> -             return;
> -
> -     dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> -     __free_page(p->page);
> -     memset(p, 0, sizeof(*p));
> +     dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +     vm_free_page(vm, p->page);
>  }
>  
>  static void *kmap_page_dma(struct i915_page_dma *p)
> @@ -387,67 +413,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
>  /* We use the flushing unmap only with ppgtt structures:
>   * page directories, page tables and scratch pages.
>   */
> -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
> +static void kunmap_page_dma(void *vaddr)
>  {
> -     /* There are only few exceptions for gen >=6. chv and bxt.
> -      * And we are not sure about the latter so play safe for now.
> -      */
> -     if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> -             drm_clflush_virt_range(vaddr, PAGE_SIZE);
> -
>       kunmap_atomic(vaddr);
>  }
>  
>  #define kmap_px(px) kmap_page_dma(px_base(px))
> -#define kunmap_px(ppgtt, vaddr) \
> -             kunmap_page_dma((ppgtt)->base.i915, (vaddr))
> +#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
>  
> -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
> -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
> -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
> -#define fill32_px(dev_priv, px, v) \
> -             fill_page_dma_32((dev_priv), px_base(px), (v))
> +#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
> +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
> +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
>  
> -static void fill_page_dma(struct drm_i915_private *dev_priv,
> -                       struct i915_page_dma *p, const uint64_t val)
> +static void fill_page_dma(struct i915_address_space *vm,
> +                       struct i915_page_dma *p,
> +                       const u64 val)
>  {
> +     u64 * const vaddr = kmap_page_dma(p);
>       int i;
> -     uint64_t * const vaddr = kmap_page_dma(p);
>  
>       for (i = 0; i < 512; i++)
>               vaddr[i] = val;
>  
> -     kunmap_page_dma(dev_priv, vaddr);
> +     kunmap_page_dma(vaddr);
>  }
>  
> -static void fill_page_dma_32(struct drm_i915_private *dev_priv,
> -                          struct i915_page_dma *p, const uint32_t val32)
> +static void fill_page_dma_32(struct i915_address_space *vm,
> +                          struct i915_page_dma *p,
> +                          const u32 v)
>  {
> -     uint64_t v = val32;
> -
> -     v = v << 32 | val32;
> -
> -     fill_page_dma(dev_priv, p, v);
> +     fill_page_dma(vm, p, (u64)v << 32 | v);
>  }
>  
>  static int
> -setup_scratch_page(struct drm_i915_private *dev_priv,
> -                struct i915_page_dma *scratch,
> -                gfp_t gfp)
> +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -     return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
> +     return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
>  }
>  
> -static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
> -                              struct i915_page_dma *scratch)
> +static void cleanup_scratch_page(struct i915_address_space *vm)
>  {
> -     cleanup_page_dma(dev_priv, scratch);
> +     cleanup_page_dma(vm, &vm->scratch_page);
>  }
>  
> -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
> +static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  {
>       struct i915_page_table *pt;
> -     const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
> +     const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
>       int ret = -ENOMEM;
>  
>       pt = kzalloc(sizeof(*pt), GFP_KERNEL);
> @@ -460,7 +473,7 @@ static struct i915_page_table *alloc_pt(struct 
> drm_i915_private *dev_priv)
>       if (!pt->used_ptes)
>               goto fail_bitmap;
>  
> -     ret = setup_px(dev_priv, pt);
> +     ret = setup_px(vm, pt);
>       if (ret)
>               goto fail_page_m;
>  
> @@ -474,10 +487,9 @@ static struct i915_page_table *alloc_pt(struct 
> drm_i915_private *dev_priv)
>       return ERR_PTR(ret);
>  }
>  
> -static void free_pt(struct drm_i915_private *dev_priv,
> -                 struct i915_page_table *pt)
> +static void free_pt(struct i915_address_space *vm, struct i915_page_table 
> *pt)
>  {
> -     cleanup_px(dev_priv, pt);
> +     cleanup_px(vm, pt);
>       kfree(pt->used_ptes);
>       kfree(pt);
>  }
> @@ -490,7 +502,7 @@ static void gen8_initialize_pt(struct i915_address_space 
> *vm,
>       scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
>                                     I915_CACHE_LLC);
>  
> -     fill_px(vm->i915, pt, scratch_pte);
> +     fill_px(vm, pt, scratch_pte);
>  }
>  
>  static void gen6_initialize_pt(struct i915_address_space *vm,
> @@ -503,10 +515,10 @@ static void gen6_initialize_pt(struct 
> i915_address_space *vm,
>       scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
>                                    I915_CACHE_LLC, 0);
>  
> -     fill32_px(vm->i915, pt, scratch_pte);
> +     fill32_px(vm, pt, scratch_pte);
>  }
>  
> -static struct i915_page_directory *alloc_pd(struct drm_i915_private 
> *dev_priv)
> +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  {
>       struct i915_page_directory *pd;
>       int ret = -ENOMEM;
> @@ -520,7 +532,7 @@ static struct i915_page_directory *alloc_pd(struct 
> drm_i915_private *dev_priv)
>       if (!pd->used_pdes)
>               goto fail_bitmap;
>  
> -     ret = setup_px(dev_priv, pd);
> +     ret = setup_px(vm, pd);
>       if (ret)
>               goto fail_page_m;
>  
> @@ -534,11 +546,11 @@ static struct i915_page_directory *alloc_pd(struct 
> drm_i915_private *dev_priv)
>       return ERR_PTR(ret);
>  }
>  
> -static void free_pd(struct drm_i915_private *dev_priv,
> +static void free_pd(struct i915_address_space *vm,
>                   struct i915_page_directory *pd)
>  {
>       if (px_page(pd)) {
> -             cleanup_px(dev_priv, pd);
> +             cleanup_px(vm, pd);
>               kfree(pd->used_pdes);
>               kfree(pd);
>       }
> @@ -551,7 +563,7 @@ static void gen8_initialize_pd(struct i915_address_space 
> *vm,
>  
>       scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>  
> -     fill_px(vm->i915, pd, scratch_pde);
> +     fill_px(vm, pd, scratch_pde);
>  }
>  
>  static int __pdp_init(struct drm_i915_private *dev_priv,
> @@ -585,23 +597,23 @@ static void __pdp_fini(struct 
> i915_page_directory_pointer *pdp)
>       pdp->page_directory = NULL;
>  }
>  
> -static struct
> -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
> +static struct i915_page_directory_pointer *
> +alloc_pdp(struct i915_address_space *vm)
>  {
>       struct i915_page_directory_pointer *pdp;
>       int ret = -ENOMEM;
>  
> -     WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
> +     WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>  
>       pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
>       if (!pdp)
>               return ERR_PTR(-ENOMEM);
>  
> -     ret = __pdp_init(dev_priv, pdp);
> +     ret = __pdp_init(vm->i915, pdp);
>       if (ret)
>               goto fail_bitmap;
>  
> -     ret = setup_px(dev_priv, pdp);
> +     ret = setup_px(vm, pdp);
>       if (ret)
>               goto fail_page_m;
>  
> @@ -615,12 +627,12 @@ i915_page_directory_pointer *alloc_pdp(struct 
> drm_i915_private *dev_priv)
>       return ERR_PTR(ret);
>  }
>  
> -static void free_pdp(struct drm_i915_private *dev_priv,
> +static void free_pdp(struct i915_address_space *vm,
>                    struct i915_page_directory_pointer *pdp)
>  {
>       __pdp_fini(pdp);
> -     if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -             cleanup_px(dev_priv, pdp);
> +     if (USES_FULL_48BIT_PPGTT(vm->i915)) {
> +             cleanup_px(vm, pdp);
>               kfree(pdp);
>       }
>  }
> @@ -632,7 +644,7 @@ static void gen8_initialize_pdp(struct i915_address_space 
> *vm,
>  
>       scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>  
> -     fill_px(vm->i915, pdp, scratch_pdpe);
> +     fill_px(vm, pdp, scratch_pdpe);
>  }
>  
>  static void gen8_initialize_pml4(struct i915_address_space *vm,
> @@ -643,7 +655,7 @@ static void gen8_initialize_pml4(struct 
> i915_address_space *vm,
>       scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
>                                         I915_CACHE_LLC);
>  
> -     fill_px(vm->i915, pml4, scratch_pml4e);
> +     fill_px(vm, pml4, scratch_pml4e);
>  }
>  
>  static void
> @@ -659,20 +671,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
>  
>       page_directorypo = kmap_px(pdp);
>       page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
> -     kunmap_px(ppgtt, page_directorypo);
> +     kunmap_px(page_directorypo);
>  }
>  
>  static void
> -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
> -              struct i915_pml4 *pml4,
> +gen8_setup_pml4e(struct i915_pml4 *pml4,
>                struct i915_page_directory_pointer *pdp,
>                int index)
>  {
>       gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
>  
> -     WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
>       pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> -     kunmap_px(ppgtt, pagemap);
> +     kunmap_px(pagemap);
>  }
>  
>  /* Broadwell Page Directory Pointer Descriptors */
> @@ -741,7 +751,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space 
> *vm,
>                               uint64_t start,
>                               uint64_t length)
>  {
> -     struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       unsigned int num_entries = gen8_pte_count(start, length);
>       unsigned int pte = gen8_pte_index(start);
>       unsigned int pte_end = pte + num_entries;
> @@ -762,7 +771,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space 
> *vm,
>       vaddr = kmap_px(pt);
>       while (pte < pte_end)
>               vaddr[pte++] = scratch_pte;
> -     kunmap_px(ppgtt, vaddr);
> +     kunmap_px(vaddr);
>  
>       return false;
>  }
> @@ -775,7 +784,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space 
> *vm,
>                               uint64_t start,
>                               uint64_t length)
>  {
> -     struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       struct i915_page_table *pt;
>       uint64_t pde;
>       gen8_pde_t *pde_vaddr;
> @@ -790,8 +798,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space 
> *vm,
>                       __clear_bit(pde, pd->used_pdes);
>                       pde_vaddr = kmap_px(pd);
>                       pde_vaddr[pde] = scratch_pde;
> -                     kunmap_px(ppgtt, pde_vaddr);
> -                     free_pt(vm->i915, pt);
> +                     kunmap_px(pde_vaddr);
> +                     free_pt(vm, pt);
>               }
>       }
>  
> @@ -820,7 +828,7 @@ static bool gen8_ppgtt_clear_pdp(struct 
> i915_address_space *vm,
>               if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
>                       __clear_bit(pdpe, pdp->used_pdpes);
>                       gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
> -                     free_pd(vm->i915, pd);
> +                     free_pd(vm, pd);
>               }
>       }
>  
> @@ -841,7 +849,6 @@ static void gen8_ppgtt_clear_pml4(struct 
> i915_address_space *vm,
>                                 uint64_t start,
>                                 uint64_t length)
>  {
> -     struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       struct i915_page_directory_pointer *pdp;
>       uint64_t pml4e;
>  
> @@ -853,8 +860,8 @@ static void gen8_ppgtt_clear_pml4(struct 
> i915_address_space *vm,
>  
>               if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
>                       __clear_bit(pml4e, pml4->used_pml4es);
> -                     gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
> -                     free_pdp(vm->i915, pdp);
> +                     gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
> +                     free_pdp(vm, pdp);
>               }
>       }
>  }
> @@ -912,12 +919,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt 
> *ppgtt,
>                               pde = 0;
>                       }
>  
> -                     kunmap_px(ppgtt, vaddr);
> +                     kunmap_px(vaddr);
>                       vaddr = kmap_px(pd->page_table[pde]);
>                       pte = 0;
>               }
>       } while (1);
> -     kunmap_px(ppgtt, vaddr);
> +     kunmap_px(vaddr);
>  
>       return ret;
>  }
> @@ -959,7 +966,7 @@ static void gen8_ppgtt_insert_4lvl(struct 
> i915_address_space *vm,
>               ;
>  }
>  
> -static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
> +static void gen8_free_page_tables(struct i915_address_space *vm,
>                                 struct i915_page_directory *pd)
>  {
>       int i;
> @@ -971,34 +978,33 @@ static void gen8_free_page_tables(struct 
> drm_i915_private *dev_priv,
>               if (WARN_ON(!pd->page_table[i]))
>                       continue;
>  
> -             free_pt(dev_priv, pd->page_table[i]);
> +             free_pt(vm, pd->page_table[i]);
>               pd->page_table[i] = NULL;
>       }
>  }
>  
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
>       int ret;
>  
> -     ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +     ret = setup_scratch_page(vm, I915_GFP_DMA);
>       if (ret)
>               return ret;
>  
> -     vm->scratch_pt = alloc_pt(dev_priv);
> +     vm->scratch_pt = alloc_pt(vm);
>       if (IS_ERR(vm->scratch_pt)) {
>               ret = PTR_ERR(vm->scratch_pt);
>               goto free_scratch_page;
>       }
>  
> -     vm->scratch_pd = alloc_pd(dev_priv);
> +     vm->scratch_pd = alloc_pd(vm);
>       if (IS_ERR(vm->scratch_pd)) {
>               ret = PTR_ERR(vm->scratch_pd);
>               goto free_pt;
>       }
>  
> -     if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -             vm->scratch_pdp = alloc_pdp(dev_priv);
> +     if (USES_FULL_48BIT_PPGTT(dev)) {
> +             vm->scratch_pdp = alloc_pdp(vm);
>               if (IS_ERR(vm->scratch_pdp)) {
>                       ret = PTR_ERR(vm->scratch_pdp);
>                       goto free_pd;
> @@ -1013,11 +1019,11 @@ static int gen8_init_scratch(struct 
> i915_address_space *vm)
>       return 0;
>  
>  free_pd:
> -     free_pd(dev_priv, vm->scratch_pd);
> +     free_pd(vm, vm->scratch_pd);
>  free_pt:
> -     free_pt(dev_priv, vm->scratch_pt);
> +     free_pt(vm, vm->scratch_pt);
>  free_scratch_page:
> -     cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +     cleanup_scratch_page(vm);
>  
>       return ret;
>  }
> @@ -1055,44 +1061,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt 
> *ppgtt, bool create)
>  
>  static void gen8_free_scratch(struct i915_address_space *vm)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
> -
> -     if (USES_FULL_48BIT_PPGTT(dev_priv))
> -             free_pdp(dev_priv, vm->scratch_pdp);
> -     free_pd(dev_priv, vm->scratch_pd);
> -     free_pt(dev_priv, vm->scratch_pt);
> -     cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +     if (USES_FULL_48BIT_PPGTT(vm->i915))
> +             free_pdp(vm, vm->scratch_pdp);
> +     free_pd(vm, vm->scratch_pd);
> +     free_pt(vm, vm->scratch_pt);
> +     cleanup_scratch_page(vm);
>  }
>  
> -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
> +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>                                   struct i915_page_directory_pointer *pdp)
>  {
>       int i;
>  
> -     for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
> +     for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
>               if (WARN_ON(!pdp->page_directory[i]))
>                       continue;
>  
> -             gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
> -             free_pd(dev_priv, pdp->page_directory[i]);
> +             gen8_free_page_tables(vm, pdp->page_directory[i]);
> +             free_pd(vm, pdp->page_directory[i]);
>       }
>  
> -     free_pdp(dev_priv, pdp);
> +     free_pdp(vm, pdp);
>  }
>  
>  static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> -     struct drm_i915_private *dev_priv = ppgtt->base.i915;
>       int i;
>  
>       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
>               if (WARN_ON(!ppgtt->pml4.pdps[i]))
>                       continue;
>  
> -             gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
> +             gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
>       }
>  
> -     cleanup_px(dev_priv, &ppgtt->pml4);
> +     cleanup_px(&ppgtt->base, &ppgtt->pml4);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -1103,8 +1106,8 @@ static void gen8_ppgtt_cleanup(struct 
> i915_address_space *vm)
>       if (intel_vgpu_active(dev_priv))
>               gen8_ppgtt_notify_vgt(ppgtt, false);
>  
> -     if (!USES_FULL_48BIT_PPGTT(dev_priv))
> -             gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
> +     if (!USES_FULL_48BIT_PPGTT(vm->i915))
> +             gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
>       else
>               gen8_ppgtt_cleanup_4lvl(ppgtt);
>  
> @@ -1135,7 +1138,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
> i915_address_space *vm,
>                                    uint64_t length,
>                                    unsigned long *new_pts)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
>       struct i915_page_table *pt;
>       uint32_t pde;
>  
> @@ -1147,7 +1149,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
> i915_address_space *vm,
>                       continue;
>               }
>  
> -             pt = alloc_pt(dev_priv);
> +             pt = alloc_pt(vm);
>               if (IS_ERR(pt))
>                       goto unwind_out;
>  
> @@ -1161,7 +1163,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
> i915_address_space *vm,
>  
>  unwind_out:
>       for_each_set_bit(pde, new_pts, I915_PDES)
> -             free_pt(dev_priv, pd->page_table[pde]);
> +             free_pt(vm, pd->page_table[pde]);
>  
>       return -ENOMEM;
>  }
> @@ -1196,7 +1198,6 @@ gen8_ppgtt_alloc_page_directories(struct 
> i915_address_space *vm,
>                                 uint64_t length,
>                                 unsigned long *new_pds)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
>       struct i915_page_directory *pd;
>       uint32_t pdpe;
>       uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> @@ -1207,7 +1208,7 @@ gen8_ppgtt_alloc_page_directories(struct 
> i915_address_space *vm,
>               if (test_bit(pdpe, pdp->used_pdpes))
>                       continue;
>  
> -             pd = alloc_pd(dev_priv);
> +             pd = alloc_pd(vm);
>               if (IS_ERR(pd))
>                       goto unwind_out;
>  
> @@ -1221,7 +1222,7 @@ gen8_ppgtt_alloc_page_directories(struct 
> i915_address_space *vm,
>  
>  unwind_out:
>       for_each_set_bit(pdpe, new_pds, pdpes)
> -             free_pd(dev_priv, pdp->page_directory[pdpe]);
> +             free_pd(vm, pdp->page_directory[pdpe]);
>  
>       return -ENOMEM;
>  }
> @@ -1249,7 +1250,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
> i915_address_space *vm,
>                                 uint64_t length,
>                                 unsigned long *new_pdps)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
>       struct i915_page_directory_pointer *pdp;
>       uint32_t pml4e;
>  
> @@ -1257,7 +1257,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
> i915_address_space *vm,
>  
>       gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>               if (!test_bit(pml4e, pml4->used_pml4es)) {
> -                     pdp = alloc_pdp(dev_priv);
> +                     pdp = alloc_pdp(vm);
>                       if (IS_ERR(pdp))
>                               goto unwind_out;
>  
> @@ -1275,7 +1275,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
> i915_address_space *vm,
>  
>  unwind_out:
>       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -             free_pdp(dev_priv, pml4->pdps[pml4e]);
> +             free_pdp(vm, pml4->pdps[pml4e]);
>  
>       return -ENOMEM;
>  }
> @@ -1324,7 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct 
> i915_address_space *vm,
>  {
>       struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       unsigned long *new_page_dirs, *new_page_tables;
> -     struct drm_i915_private *dev_priv = vm->i915;
>       struct i915_page_directory *pd;
>       const uint64_t orig_start = start;
>       const uint64_t orig_length = length;
> @@ -1393,7 +1392,7 @@ static int gen8_alloc_va_range_3lvl(struct 
> i915_address_space *vm,
>                        * point we're still relying on insert_entries() */
>               }
>  
> -             kunmap_px(ppgtt, page_directory);
> +             kunmap_px(page_directory);
>               __set_bit(pdpe, pdp->used_pdpes);
>               gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
>       }
> @@ -1408,12 +1407,11 @@ static int gen8_alloc_va_range_3lvl(struct 
> i915_address_space *vm,
>  
>               for_each_set_bit(temp, new_page_tables + pdpe *
>                               BITS_TO_LONGS(I915_PDES), I915_PDES)
> -                     free_pt(dev_priv,
> -                             pdp->page_directory[pdpe]->page_table[temp]);
> +                     free_pt(vm, 
> pdp->page_directory[pdpe]->page_table[temp]);
>       }
>  
>       for_each_set_bit(pdpe, new_page_dirs, pdpes)
> -             free_pd(dev_priv, pdp->page_directory[pdpe]);
> +             free_pd(vm, pdp->page_directory[pdpe]);
>  
>       free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>       mark_tlbs_dirty(ppgtt);
> @@ -1426,7 +1424,6 @@ static int gen8_alloc_va_range_4lvl(struct 
> i915_address_space *vm,
>                                   uint64_t length)
>  {
>       DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> -     struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       struct i915_page_directory_pointer *pdp;
>       uint64_t pml4e;
>       int ret = 0;
> @@ -1454,7 +1451,7 @@ static int gen8_alloc_va_range_4lvl(struct 
> i915_address_space *vm,
>               if (ret)
>                       goto err_out;
>  
> -             gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
> +             gen8_setup_pml4e(pml4, pdp, pml4e);
>       }
>  
>       bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> @@ -1464,7 +1461,7 @@ static int gen8_alloc_va_range_4lvl(struct 
> i915_address_space *vm,
>  
>  err_out:
>       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -             gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
> +             gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
>  
>       return ret;
>  }
> @@ -1480,7 +1477,8 @@ static int gen8_alloc_va_range(struct 
> i915_address_space *vm,
>               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>  
> -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
> +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
> +                       struct i915_page_directory_pointer *pdp,
>                         uint64_t start, uint64_t length,
>                         gen8_pte_t scratch_pte,
>                         struct seq_file *m)
> @@ -1546,7 +1544,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt 
> *ppgtt, struct seq_file *m)
>                                                I915_CACHE_LLC);
>  
>       if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
> -             gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
> +             gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, 
> m);
>       } else {
>               uint64_t pml4e;
>               struct i915_pml4 *pml4 = &ppgtt->pml4;
> @@ -1557,7 +1555,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt 
> *ppgtt, struct seq_file *m)
>                               continue;
>  
>                       seq_printf(m, "    PML4E #%llu\n", pml4e);
> -                     gen8_dump_pdp(pdp, start, length, scratch_pte, m);
> +                     gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, 
> m);
>               }
>       }
>  }
> @@ -1613,8 +1611,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>       ppgtt->base.bind_vma = ppgtt_bind_vma;
>       ppgtt->debug_dump = gen8_dump_ppgtt;
>  
> +     /* There are only few exceptions for gen >=6. chv and bxt.
> +      * And we are not sure about the latter so play safe for now.
> +      */
> +     if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
> +             ppgtt->base.pt_kmap_wc = true;
> +
>       if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -             ret = setup_px(dev_priv, &ppgtt->pml4);
> +             ret = setup_px(&ppgtt->base, &ppgtt->pml4);
>               if (ret)
>                       goto free_scratch;
>  
> @@ -1703,7 +1707,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt 
> *ppgtt, struct seq_file *m)
>                       }
>                       seq_puts(m, "\n");
>               }
> -             kunmap_px(ppgtt, pt_vaddr);
> +             kunmap_px(pt_vaddr);
>       }
>  }
>  
> @@ -1900,7 +1904,7 @@ static void gen6_ppgtt_clear_range(struct 
> i915_address_space *vm,
>               for (i = first_pte; i < last_pte; i++)
>                       pt_vaddr[i] = scratch_pte;
>  
> -             kunmap_px(ppgtt, pt_vaddr);
> +             kunmap_px(pt_vaddr);
>  
>               num_entries -= last_pte - first_pte;
>               first_pte = 0;
> @@ -1939,12 +1943,12 @@ static void gen6_ppgtt_insert_entries(struct 
> i915_address_space *vm,
>               }
>  
>               if (++act_pte == GEN6_PTES) {
> -                     kunmap_px(ppgtt, vaddr);
> +                     kunmap_px(vaddr);
>                       vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
>                       act_pte = 0;
>               }
>       } while (1);
> -     kunmap_px(ppgtt, vaddr);
> +     kunmap_px(vaddr);
>  }
>  
>  static int gen6_alloc_va_range(struct i915_address_space *vm,
> @@ -1978,7 +1982,7 @@ static int gen6_alloc_va_range(struct 
> i915_address_space *vm,
>               /* We've already allocated a page table */
>               WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
>  
> -             pt = alloc_pt(dev_priv);
> +             pt = alloc_pt(vm);
>               if (IS_ERR(pt)) {
>                       ret = PTR_ERR(pt);
>                       goto unwind_out;
> @@ -2026,7 +2030,7 @@ static int gen6_alloc_va_range(struct 
> i915_address_space *vm,
>               struct i915_page_table *pt = ppgtt->pd.page_table[pde];
>  
>               ppgtt->pd.page_table[pde] = vm->scratch_pt;
> -             free_pt(dev_priv, pt);
> +             free_pt(vm, pt);
>       }
>  
>       mark_tlbs_dirty(ppgtt);
> @@ -2035,16 +2039,15 @@ static int gen6_alloc_va_range(struct 
> i915_address_space *vm,
>  
>  static int gen6_init_scratch(struct i915_address_space *vm)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
>       int ret;
>  
> -     ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +     ret = setup_scratch_page(vm, I915_GFP_DMA);
>       if (ret)
>               return ret;
>  
> -     vm->scratch_pt = alloc_pt(dev_priv);
> +     vm->scratch_pt = alloc_pt(vm);
>       if (IS_ERR(vm->scratch_pt)) {
> -             cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +             cleanup_scratch_page(vm);
>               return PTR_ERR(vm->scratch_pt);
>       }
>  
> @@ -2055,17 +2058,14 @@ static int gen6_init_scratch(struct 
> i915_address_space *vm)
>  
>  static void gen6_free_scratch(struct i915_address_space *vm)
>  {
> -     struct drm_i915_private *dev_priv = vm->i915;
> -
> -     free_pt(dev_priv, vm->scratch_pt);
> -     cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +     free_pt(vm, vm->scratch_pt);
> +     cleanup_scratch_page(vm);
>  }
>  
>  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>       struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>       struct i915_page_directory *pd = &ppgtt->pd;
> -     struct drm_i915_private *dev_priv = vm->i915;
>       struct i915_page_table *pt;
>       uint32_t pde;
>  
> @@ -2073,7 +2073,7 @@ static void gen6_ppgtt_cleanup(struct 
> i915_address_space *vm)
>  
>       gen6_for_all_pdes(pt, pd, pde)
>               if (pt != vm->scratch_pt)
> -                     free_pt(dev_priv, pt);
> +                     free_pt(vm, pt);
>  
>       gen6_free_scratch(vm);
>  }
> @@ -2182,6 +2182,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
>                          struct drm_i915_private *dev_priv)
>  {
>       ppgtt->base.i915 = dev_priv;
> +     ppgtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>       if (INTEL_INFO(dev_priv)->gen < 8)
>               return gen6_ppgtt_init(ppgtt);
> @@ -2199,10 +2200,14 @@ static void i915_address_space_init(struct 
> i915_address_space *vm,
>       INIT_LIST_HEAD(&vm->inactive_list);
>       INIT_LIST_HEAD(&vm->unbound_list);
>       list_add_tail(&vm->global_link, &dev_priv->vm_list);
> +     pagevec_init(&vm->free_pages, false);
>  }
>  
>  static void i915_address_space_fini(struct i915_address_space *vm)
>  {
> +     if (pagevec_count(&vm->free_pages))
> +             vm_free_pages_release(vm);
> +
>       i915_gem_timeline_fini(&vm->timeline);
>       drm_mm_takedown(&vm->mm);
>       list_del(&vm->global_link);
> @@ -2310,9 +2315,8 @@ void i915_ppgtt_release(struct kref *kref)
>       WARN_ON(!list_empty(&ppgtt->base.inactive_list));
>       WARN_ON(!list_empty(&ppgtt->base.unbound_list));
>  
> -     i915_address_space_fini(&ppgtt->base);
> -
>       ppgtt->base.cleanup(&ppgtt->base);
> +     i915_address_space_fini(&ppgtt->base);
>       kfree(ppgtt);
>  }
>  
> @@ -2947,7 +2951,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, 
> u64 size)
>               return -ENOMEM;
>       }
>  
> -     ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
> +     ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
>       if (ret) {
>               DRM_ERROR("Scratch setup failed\n");
>               /* iounmap will also get called at remove, but meh */
> @@ -3036,7 +3040,7 @@ static void gen6_gmch_remove(struct i915_address_space 
> *vm)
>       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  
>       iounmap(ggtt->gsm);
> -     cleanup_scratch_page(vm->i915, &vm->scratch_page);
> +     cleanup_scratch_page(vm);
>  }
>  
>  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> @@ -3187,6 +3191,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private 
> *dev_priv)
>       int ret;
>  
>       ggtt->base.i915 = dev_priv;
> +     ggtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>       if (INTEL_GEN(dev_priv) <= 5)
>               ret = i915_gmch_probe(ggtt);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 3c5ef5358cef..c59a7687ed6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -36,6 +36,7 @@
>  
>  #include <linux/io-mapping.h>
>  #include <linux/mm.h>
> +#include <linux/pagevec.h>
>  
>  #include "i915_gem_timeline.h"
>  #include "i915_gem_request.h"
> @@ -247,6 +248,7 @@ struct i915_address_space {
>       struct drm_mm mm;
>       struct i915_gem_timeline timeline;
>       struct drm_i915_private *i915;
> +     struct device *dma;
>       /* Every address space belongs to a struct file - except for the global
>        * GTT that is owned by the driver (and so @file is set to NULL). In
>        * principle, no information should leak from one context to another
> @@ -297,6 +299,9 @@ struct i915_address_space {
>        */
>       struct list_head unbound_list;
>  
> +     struct pagevec free_pages;
> +     bool pt_kmap_wc;
> +
>       /* FIXME: Need a more generic return type */
>       gen6_pte_t (*pte_encode)(dma_addr_t addr,
>                                enum i915_cache_level level,
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to