i915: Remove asynchronous vma binding

Daniel Vetter Wed, 21 Apr 2021 04:52:06 -0700

On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote:
> Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
> tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
> but dma_fence_work has has signalling fence semantics.
> 
> On braswell, we can call stop_machine inside fence_work, causing a splat
> because memory allocation inside stop_machine is allowed.
> 
> This patch does not fix that lockdep splat yet, but will allow the next
> patch to remove it.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankho...@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vet...@ffwll.ch>


Thomas pointed out that this removes pipelined pte writing for both ggtt
and ppgtt, and that's a bit much. So need to retract my ack here.
-Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
>  drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
>  drivers/gpu/drm/i915/i915_gem.c            |   6 -
>  drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
>  drivers/gpu/drm/i915/i915_vma.h            |   5 +-
>  drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
>  9 files changed, 21 insertions(+), 178 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index b0597de206de..ec04df0a3b89 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
>       if (!drm_mm_node_allocated(&ggtt->error_capture))
>               return;
>  
> -     if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
> -             return; /* beware stop_machine() inversion */
> -
>       GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
>  
>       mutex_lock(&ggtt->error_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
> b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index e08dff376339..0c5a9a767849 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
>       ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
>       ppgtt->base.vm.top = 1;
>  
> -     ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>       ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>       ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
>       ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 176c19633412..92f8a23e66cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
>                       goto err_free_pd;
>       }
>  
> -     ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>       ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>       ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>       ppgtt->vm.clear_range = gen8_ppgtt_clear;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 670c1271e7d5..e1ec6edae1fb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  
>       list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
>               GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -             i915_vma_wait_for_bind(vma);
>  
>               if (i915_vma_is_pinned(vma))
>                       continue;
> @@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>       ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
>  
>       ggtt->alias = ppgtt;
> -     ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
>  
>       GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
>       ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
> @@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>           IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
>               ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
>               ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
> -             ggtt->vm.bind_async_flags =
> -                     I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>       }
>  
>       ggtt->invalidate = gen8_ggtt_invalidate;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
> b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index e67e34e17913..d9d2ca8b4b61 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -230,8 +230,6 @@ struct i915_address_space {
>       u64 total;              /* size addr space maps (ex. 2GB for ggtt) */
>       u64 reserved;           /* size addr space reserved */
>  
> -     unsigned int bind_async_flags;
> -
>       /*
>        * Each active user context has its own address space (in full-ppgtt).
>        * Since the vm may be shared between multiple contexts, we count how
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b23f58e94cfb..4639c47c038b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object 
> *obj,
>               mutex_unlock(&ggtt->vm.mutex);
>       }
>  
> -     ret = i915_vma_wait_for_bind(vma);
> -     if (ret) {
> -             i915_vma_unpin(vma);
> -             return ERR_PTR(ret);
> -     }
> -
>       return vma;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 07490db51cdc..e4b2590d41ce 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>       return vma;
>  }
>  
> -struct i915_vma_work {
> -     struct dma_fence_work base;
> -     struct i915_address_space *vm;
> -     struct i915_vm_pt_stash stash;
> -     struct i915_vma *vma;
> -     struct drm_i915_gem_object *pinned;
> -     struct i915_sw_dma_fence_cb cb;
> -     enum i915_cache_level cache_level;
> -     unsigned int flags;
> -};
> -
> -static int __vma_bind(struct dma_fence_work *work)
> -{
> -     struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -     struct i915_vma *vma = vw->vma;
> -
> -     vma->ops->bind_vma(vw->vm, &vw->stash,
> -                        vma, vw->cache_level, vw->flags);
> -     return 0;
> -}
> -
> -static void __vma_release(struct dma_fence_work *work)
> -{
> -     struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -
> -     if (vw->pinned) {
> -             __i915_gem_object_unpin_pages(vw->pinned);
> -             i915_gem_object_put(vw->pinned);
> -     }
> -
> -     i915_vm_free_pt_stash(vw->vm, &vw->stash);
> -     i915_vm_put(vw->vm);
> -}
> -
> -static const struct dma_fence_work_ops bind_ops = {
> -     .name = "bind",
> -     .work = __vma_bind,
> -     .release = __vma_release,
> -};
> -
> -struct i915_vma_work *i915_vma_work(void)
> -{
> -     struct i915_vma_work *vw;
> -
> -     vw = kzalloc(sizeof(*vw), GFP_KERNEL);
> -     if (!vw)
> -             return NULL;
> -
> -     dma_fence_work_init(&vw->base, &bind_ops);
> -     vw->base.dma.error = -EAGAIN; /* disable the worker by default */
> -
> -     return vw;
> -}
> -
> -int i915_vma_wait_for_bind(struct i915_vma *vma)
> -{
> -     int err = 0;
> -
> -     if (rcu_access_pointer(vma->active.excl.fence)) {
> -             struct dma_fence *fence;
> -
> -             rcu_read_lock();
> -             fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
> -             rcu_read_unlock();
> -             if (fence) {
> -                     err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
> -                     dma_fence_put(fence);
> -             }
> -     }
> -
> -     return err;
> -}
> -
>  /**
>   * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address 
> space.
>   * @vma: VMA to map
> @@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
>   */
>  int i915_vma_bind(struct i915_vma *vma,
>                 enum i915_cache_level cache_level,
> -               u32 flags,
> -               struct i915_vma_work *work)
> +               u32 flags, struct i915_vm_pt_stash *stash)
>  {
>       u32 bind_flags;
>       u32 vma_flags;
> @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
>       GEM_BUG_ON(!vma->pages);
>  
>       trace_i915_vma_bind(vma, bind_flags);
> -     if (work && bind_flags & vma->vm->bind_async_flags) {
> -             struct dma_fence *prev;
> -
> -             work->vma = vma;
> -             work->cache_level = cache_level;
> -             work->flags = bind_flags;
> -
> -             /*
> -              * Note we only want to chain up to the migration fence on
> -              * the pages (not the object itself). As we don't track that,
> -              * yet, we have to use the exclusive fence instead.
> -              *
> -              * Also note that we do not want to track the async vma as
> -              * part of the obj->resv->excl_fence as it only affects
> -              * execution and not content or object's backing store lifetime.
> -              */
> -             prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
> -             if (prev) {
> -                     __i915_sw_fence_await_dma_fence(&work->base.chain,
> -                                                     prev,
> -                                                     &work->cb);
> -                     dma_fence_put(prev);
> -             }
> -
> -             work->base.dma.error = 0; /* enable the queue_work() */
> -
> -             if (vma->obj) {
> -                     __i915_gem_object_pin_pages(vma->obj);
> -                     work->pinned = i915_gem_object_get(vma->obj);
> -             }
> -     } else {
> -             vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
> -     }
> +     vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
>  
>       atomic_or(bind_flags, &vma->flags);
>       return 0;
> @@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>       if (!drm_mm_node_allocated(&vma->node))
>               return false;
>  
> -     if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
> -             return true;
> -
>       if (vma->node.size < size)
>               return true;
>  
> @@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned 
> int flags)
>               if (unlikely(flags & ~bound))
>                       return false;
>  
> -             if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
> +             if (unlikely(bound & I915_VMA_OVERFLOW))
>                       return false;
>  
>               if (!(bound & I915_VMA_PIN_MASK))
> @@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned 
> int flags)
>        */
>       mutex_lock(&vma->vm->mutex);
>       do {
> -             if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
> +             if (unlikely(bound & I915_VMA_OVERFLOW)) {
>                       pinned = false;
>                       break;
>               }
> @@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
>  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>                   u64 size, u64 alignment, u64 flags)
>  {
> -     struct i915_vma_work *work = NULL;
>       intel_wakeref_t wakeref = 0;
>       unsigned int bound;
>       int err;
> +     struct i915_vm_pt_stash stash = {};
>  
>  #ifdef CONFIG_PROVE_LOCKING
>       if (debug_locks && !WARN_ON(!ww) && vma->resv)
> @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>       if (flags & PIN_GLOBAL)
>               wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>  
> -     if (flags & vma->vm->bind_async_flags) {
> -             /* lock VM */
> -             err = i915_vm_lock_objects(vma->vm, ww);
> +     if (vma->vm->allocate_va_range) {
> +             err = i915_vm_alloc_pt_stash(vma->vm,
> +                                          &stash,
> +                                          vma->size);
>               if (err)
>                       goto err_rpm;
>  
> -             work = i915_vma_work();
> -             if (!work) {
> -                     err = -ENOMEM;
> +             err = i915_vm_lock_objects(vma->vm, ww);
> +             if (err)
>                       goto err_rpm;
> -             }
>  
> -             work->vm = i915_vm_get(vma->vm);
> -
> -             /* Allocate enough page directories to used PTE */
> -             if (vma->vm->allocate_va_range) {
> -                     err = i915_vm_alloc_pt_stash(vma->vm,
> -                                                  &work->stash,
> -                                                  vma->size);
> -                     if (err)
> -                             goto err_fence;
> -
> -                     err = i915_vm_pin_pt_stash(vma->vm,
> -                                                &work->stash);
> -                     if (err)
> -                             goto err_fence;
> -             }
> +             err = i915_vm_pin_pt_stash(vma->vm,
> +                                        &stash);
> +             if (err)
> +                     goto err_rpm;
>       }
>  
>       /*
> @@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>       err = mutex_lock_interruptible_nested(&vma->vm->mutex,
>                                             !(flags & PIN_GLOBAL));
>       if (err)
> -             goto err_fence;
> +             goto err_rpm;
>  
>       /* No more allocations allowed now we hold vm->mutex */
>  
> @@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>       }
>  
>       bound = atomic_read(&vma->flags);
> -     if (unlikely(bound & I915_VMA_ERROR)) {
> -             err = -ENOMEM;
> -             goto err_unlock;
> -     }
> -
>       if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
>               err = -EAGAIN; /* pins are meant to be fairly temporary */
>               goto err_unlock;
> @@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>       GEM_BUG_ON(!vma->pages);
>       err = i915_vma_bind(vma,
>                           vma->obj ? vma->obj->cache_level : 0,
> -                         flags, work);
> +                         flags, &stash);
>       if (err)
>               goto err_remove;
>  
> @@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>       i915_active_release(&vma->active);
>  err_unlock:
>       mutex_unlock(&vma->vm->mutex);
> -err_fence:
> -     if (work)
> -             dma_fence_work_commit_imm(&work->base);
>  err_rpm:
> +     i915_vm_free_pt_stash(vma->vm, &stash);
>       if (wakeref)
>               intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
>       vma_put_pages(vma);
> @@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct 
> i915_gem_ww_ctx *ww,
>                       err = i915_vma_pin_ww(vma, ww, 0, align, flags | 
> PIN_GLOBAL);
>               else
>                       err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
> -             if (err != -ENOSPC) {
> -                     if (!err) {
> -                             err = i915_vma_wait_for_bind(vma);
> -                             if (err)
> -                                     i915_vma_unpin(vma);
> -                     }
> +             if (err != -ENOSPC)
>                       return err;
> -             }
>  
>               /* Unlike i915_vma_pin, we don't take no for an answer! */
>               flush_idle_contexts(vm->gt);
> @@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
>               trace_i915_vma_unbind(vma);
>               vma->ops->unbind_vma(vma->vm, vma);
>       }
> -     atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
> +     atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
>                  &vma->flags);
>  
>       i915_vma_detach(vma);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 8df784a026d2..d1d0fc76ef99 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
>       return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
>  }
>  
> -struct i915_vma_work *i915_vma_work(void);
>  int i915_vma_bind(struct i915_vma *vma,
>                 enum i915_cache_level cache_level,
>                 u32 flags,
> -               struct i915_vma_work *work);
> +               struct i915_vm_pt_stash *stash);
>  
>  bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
>  bool i915_vma_misplaced(const struct i915_vma *vma,
> @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct 
> i915_vma *vma);
>  void i915_vma_make_shrinkable(struct i915_vma *vma);
>  void i915_vma_make_purgeable(struct i915_vma *vma);
>  
> -int i915_vma_wait_for_bind(struct i915_vma *vma);
> -
>  static inline int i915_vma_sync(struct i915_vma *vma)
>  {
>       /* Wait for the asynchronous bindings and pending GPU reads */
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h 
> b/drivers/gpu/drm/i915/i915_vma_types.h
> index 6b1bfa230b82..82868a755a96 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -240,9 +240,6 @@ struct i915_vma {
>  
>  #define I915_VMA_ALLOC_BIT   12
>  
> -#define I915_VMA_ERROR_BIT   13
> -#define I915_VMA_ERROR               ((int)BIT(I915_VMA_ERROR_BIT))
> -
>  #define I915_VMA_GGTT_BIT    14
>  #define I915_VMA_CAN_FENCE_BIT       15
>  #define I915_VMA_USERFAULT_BIT       16
> -- 
> 2.31.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH 1/2] drm/i915: Remove asynchronous vma binding

Reply via email to