i915: Update error capture code to avoid using the current vma state

Ramalingam C Mon, 29 Nov 2021 01:11:28 -0800

On 2021-11-08 at 18:45:46 +0100, Thomas Hellström wrote:
> With asynchronous migrations, the vma state may be several migrations
> ahead of the state that matches the request we're capturing.
> Address that by introducing an i915_vma_snapshot structure that
> can be used to snapshot relevant state at request submission.
> In order to make sure we access the correct memory, the snapshots take
> references on relevant sg-tables and memory regions.
> 
> Also move the capture list allocation out of the fence signaling
> critical path and use the CONFIG_DRM_I915_CAPTURE_ERROR define to
> avoid compiling in members and functions used for error capture
> when they're not used.
> 
> Finally, Introduce lockdep annotation.
> 
> v4:
> - Break out the capture allocation mode change to a separate patch.
> v5:
> - Fix compilation error in the !CONFIG_DRM_I915_CAPTURE_ERROR case
>   (kernel test robot)
> v6:
> - Use #if IS_ENABLED() instead of #ifdef to match driver style.
> - Move yet another change of allocation mode to the separate patch.
> - Commit message rework due to patch reordering.
> 
> Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com>


Looks good to me

Reviewed-by : Ramalingam C <ramalinga...@intel.com>

> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 135 +++++++++++---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   8 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c         | 176 +++++++++++++-----
>  drivers/gpu/drm/i915/i915_request.c           |  63 +++++--
>  drivers/gpu/drm/i915/i915_request.h           |  20 +-
>  drivers/gpu/drm/i915/i915_vma_snapshot.c      | 137 ++++++++++++++
>  drivers/gpu/drm/i915/i915_vma_snapshot.h      | 112 +++++++++++
>  8 files changed, 557 insertions(+), 95 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.c
>  create mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 7d0d0b814670..b4372e0a802f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -174,6 +174,7 @@ i915-y += \
>         i915_trace_points.o \
>         i915_ttm_buddy_manager.o \
>         i915_vma.o \
> +       i915_vma_snapshot.o \
>         intel_wopcm.o
>  
>  # general-purpose microcontroller (GuC) support
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index ea5b7b2a4d70..ddca899f2396 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -29,6 +29,7 @@
>  #include "i915_gem_ioctls.h"
>  #include "i915_trace.h"
>  #include "i915_user_extensions.h"
> +#include "i915_vma_snapshot.h"
>  
>  struct eb_vma {
>       struct i915_vma *vma;
> @@ -307,11 +308,15 @@ struct i915_execbuffer {
>  
>       struct eb_fence *fences;
>       unsigned long num_fences;
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
> +     struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
> +#endif
>  };
>  
>  static int eb_parse(struct i915_execbuffer *eb);
>  static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
>  static void eb_unpin_engine(struct i915_execbuffer *eb);
> +static void eb_capture_release(struct i915_execbuffer *eb);
>  
>  static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>  {
> @@ -1054,6 +1059,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, 
> bool final)
>                       i915_vma_put(vma);
>       }
>  
> +     eb_capture_release(eb);
>       eb_unpin_engine(eb);
>  }
>  
> @@ -1891,36 +1897,113 @@ eb_find_first_request_added(struct i915_execbuffer 
> *eb)
>       return NULL;
>  }
>  
> -static int eb_move_to_gpu(struct i915_execbuffer *eb)
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
> +
> +/* Stage with GFP_KERNEL allocations before we enter the signaling critical 
> path */
> +static void eb_capture_stage(struct i915_execbuffer *eb)
>  {
>       const unsigned int count = eb->buffer_count;
> -     unsigned int i = count;
> -     int err = 0, j;
> +     unsigned int i = count, j;
> +     struct i915_vma_snapshot *vsnap;
>  
>       while (i--) {
>               struct eb_vma *ev = &eb->vma[i];
>               struct i915_vma *vma = ev->vma;
>               unsigned int flags = ev->flags;
> -             struct drm_i915_gem_object *obj = vma->obj;
>  
> -             assert_vma_held(vma);
> +             if (!(flags & EXEC_OBJECT_CAPTURE))
> +                     continue;
>  
> -             if (flags & EXEC_OBJECT_CAPTURE) {
> +             vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
> +             if (!vsnap)
> +                     continue;
> +
> +             i915_vma_snapshot_init(vsnap, vma, "user");
> +             for_each_batch_create_order(eb, j) {
>                       struct i915_capture_list *capture;
>  
> -                     for_each_batch_create_order(eb, j) {
> -                             if (!eb->requests[j])
> -                                     break;
> +                     capture = kmalloc(sizeof(*capture), GFP_KERNEL);
> +                     if (!capture)
> +                             continue;
>  
> -                             capture = kmalloc(sizeof(*capture), GFP_KERNEL);
> -                             if (capture) {
> -                                     capture->next =
> -                                             eb->requests[j]->capture_list;
> -                                     capture->vma = vma;
> -                                     eb->requests[j]->capture_list = capture;
> -                             }
> -                     }
> +                     capture->next = eb->capture_lists[j];
> +                     capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
> +                     eb->capture_lists[j] = capture;
> +             }
> +             i915_vma_snapshot_put(vsnap);
> +     }
> +}
> +
> +/* Commit once we're in the critical path */
> +static void eb_capture_commit(struct i915_execbuffer *eb)
> +{
> +     unsigned int j;
> +
> +     for_each_batch_create_order(eb, j) {
> +             struct i915_request *rq = eb->requests[j];
> +
> +             if (!rq)
> +                     break;
> +
> +             rq->capture_list = eb->capture_lists[j];
> +             eb->capture_lists[j] = NULL;
> +     }
> +}
> +
> +/*
> + * Release anything that didn't get committed due to errors.
> + * The capture_list will otherwise be freed at request retire.
> + */
> +static void eb_capture_release(struct i915_execbuffer *eb)
> +{
> +     unsigned int j;
> +
> +     for_each_batch_create_order(eb, j) {
> +             if (eb->capture_lists[j]) {
> +                     i915_request_free_capture_list(eb->capture_lists[j]);
> +                     eb->capture_lists[j] = NULL;
>               }
> +     }
> +}
> +
> +static void eb_capture_list_clear(struct i915_execbuffer *eb)
> +{
> +     memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
> +}
> +
> +#else
> +
> +static void eb_capture_stage(struct i915_execbuffer *eb)
> +{
> +}
> +
> +static void eb_capture_commit(struct i915_execbuffer *eb)
> +{
> +}
> +
> +static void eb_capture_release(struct i915_execbuffer *eb)
> +{
> +}
> +
> +static void eb_capture_list_clear(struct i915_execbuffer *eb)
> +{
> +}
> +
> +#endif
> +
> +static int eb_move_to_gpu(struct i915_execbuffer *eb)
> +{
> +     const unsigned int count = eb->buffer_count;
> +     unsigned int i = count;
> +     int err = 0, j;
> +
> +     while (i--) {
> +             struct eb_vma *ev = &eb->vma[i];
> +             struct i915_vma *vma = ev->vma;
> +             unsigned int flags = ev->flags;
> +             struct drm_i915_gem_object *obj = vma->obj;
> +
> +             assert_vma_held(vma);
>  
>               /*
>                * If the GPU is not _reading_ through the CPU cache, we need
> @@ -2001,6 +2084,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>  
>       /* Unconditionally flush any chipset caches (for streaming writes). */
>       intel_gt_chipset_flush(eb->gt);
> +     eb_capture_commit(eb);
> +
>       return 0;
>  
>  err_skip:
> @@ -3143,13 +3228,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct 
> dma_fence *in_fence,
>               }
>  
>               /*
> -              * Whilst this request exists, batch_obj will be on the
> -              * active_list, and so will hold the active reference. Only when
> -              * this request is retired will the batch_obj be moved onto
> -              * the inactive_list and lose its active reference. Hence we do
> -              * not need to explicitly hold another reference here.
> +              * Not really on stack, but we don't want to call
> +              * kfree on the batch_snapshot when we put it, so use the
> +              * _onstack interface.
>                */
> -             eb->requests[i]->batch = eb->batches[i]->vma;
> +             if (eb->batches[i]->vma)
> +                     
> i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
> +                                                    eb->batches[i]->vma,
> +                                                    "batch");
>               if (eb->batch_pool) {
>                       GEM_BUG_ON(intel_context_is_parallel(eb->context));
>                       intel_gt_buffer_pool_mark_active(eb->batch_pool,
> @@ -3198,6 +3284,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>       eb.fences = NULL;
>       eb.num_fences = 0;
>  
> +     eb_capture_list_clear(&eb);
> +
>       memset(eb.requests, 0, sizeof(struct i915_request *) *
>              ARRAY_SIZE(eb.requests));
>       eb.composite_fence = NULL;
> @@ -3284,6 +3372,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>       }
>  
>       ww_acquire_done(&eb.ww.ctx);
> +     eb_capture_stage(&eb);
>  
>       out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
>       if (IS_ERR(out_fence)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 332756036007..f2ccd5b53d42 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1676,14 +1676,18 @@ static void intel_engine_print_registers(struct 
> intel_engine_cs *engine,
>  
>  static void print_request_ring(struct drm_printer *m, struct i915_request 
> *rq)
>  {
> +     struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
>       void *ring;
>       int size;
>  
> +     if (!i915_vma_snapshot_present(vsnap))
> +             vsnap = NULL;
> +
>       drm_printf(m,
>                  "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
>                  rq->head, rq->postfix, rq->tail,
> -                rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
> -                rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
> +                vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
> +                vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
>  
>       size = rq->tail - rq->head;
>       if (rq->tail < rq->head)
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b1e4ce0f798f..c61f9aaa40f9 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -48,6 +48,7 @@
>  #include "i915_gpu_error.h"
>  #include "i915_memcpy.h"
>  #include "i915_scatterlist.h"
> +#include "i915_vma_snapshot.h"
>  
>  #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | 
> __GFP_NOWARN)
>  #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
> @@ -1012,8 +1013,7 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
>  
>  static struct i915_vma_coredump *
>  i915_vma_coredump_create(const struct intel_gt *gt,
> -                      const struct i915_vma *vma,
> -                      const char *name,
> +                      const struct i915_vma_snapshot *vsnap,
>                        struct i915_vma_compress *compress)
>  {
>       struct i915_ggtt *ggtt = gt->ggtt;
> @@ -1024,7 +1024,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
>  
>       might_sleep();
>  
> -     if (!vma || !vma->pages || !compress)
> +     if (!vsnap || !vsnap->pages || !compress)
>               return NULL;
>  
>       dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
> @@ -1037,12 +1037,12 @@ i915_vma_coredump_create(const struct intel_gt *gt,
>       }
>  
>       INIT_LIST_HEAD(&dst->page_list);
> -     strcpy(dst->name, name);
> +     strcpy(dst->name, vsnap->name);
>       dst->next = NULL;
>  
> -     dst->gtt_offset = vma->node.start;
> -     dst->gtt_size = vma->node.size;
> -     dst->gtt_page_sizes = vma->page_sizes.gtt;
> +     dst->gtt_offset = vsnap->gtt_offset;
> +     dst->gtt_size = vsnap->gtt_size;
> +     dst->gtt_page_sizes = vsnap->page_sizes;
>       dst->unused = 0;
>  
>       ret = -EINVAL;
> @@ -1050,7 +1050,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
>               void __iomem *s;
>               dma_addr_t dma;
>  
> -             for_each_sgt_daddr(dma, iter, vma->pages) {
> +             for_each_sgt_daddr(dma, iter, vsnap->pages) {
>                       mutex_lock(&ggtt->error_mutex);
>                       ggtt->vm.insert_page(&ggtt->vm, dma, slot,
>                                            I915_CACHE_NONE, 0);
> @@ -1068,11 +1068,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
>                       if (ret)
>                               break;
>               }
> -     } else if (__i915_gem_object_is_lmem(vma->obj)) {
> -             struct intel_memory_region *mem = vma->obj->mm.region;
> +     } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
> +             struct intel_memory_region *mem = vsnap->mr;
>               dma_addr_t dma;
>  
> -             for_each_sgt_daddr(dma, iter, vma->pages) {
> +             for_each_sgt_daddr(dma, iter, vsnap->pages) {
>                       void __iomem *s;
>  
>                       s = io_mapping_map_wc(&mem->iomap,
> @@ -1088,7 +1088,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
>       } else {
>               struct page *page;
>  
> -             for_each_sgt_page(page, iter, vma->pages) {
> +             for_each_sgt_page(page, iter, vsnap->pages) {
>                       void *s;
>  
>                       drm_clflush_pages(&page, 1);
> @@ -1324,37 +1324,71 @@ static bool record_context(struct 
> i915_gem_context_coredump *e,
>  
>  struct intel_engine_capture_vma {
>       struct intel_engine_capture_vma *next;
> -     struct i915_vma *vma;
> +     struct i915_vma_snapshot *vsnap;
>       char name[16];
> +     bool lockdep_cookie;
>  };
>  
>  static struct intel_engine_capture_vma *
> -capture_vma(struct intel_engine_capture_vma *next,
> -         struct i915_vma *vma,
> -         const char *name,
> -         gfp_t gfp)
> +capture_vma_snapshot(struct intel_engine_capture_vma *next,
> +                  struct i915_vma_snapshot *vsnap,
> +                  gfp_t gfp)
>  {
>       struct intel_engine_capture_vma *c;
>  
> -     if (!vma)
> +     if (!i915_vma_snapshot_present(vsnap))
>               return next;
>  
>       c = kmalloc(sizeof(*c), gfp);
>       if (!c)
>               return next;
>  
> -     if (!i915_active_acquire_if_busy(&vma->active)) {
> +     if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
>               kfree(c);
>               return next;
>       }
>  
> -     strcpy(c->name, name);
> -     c->vma = vma; /* reference held while active */
> +     strcpy(c->name, vsnap->name);
> +     c->vsnap = vsnap;
> +     i915_vma_snapshot_get(vsnap);
>  
>       c->next = next;
>       return c;
>  }
>  
> +static struct intel_engine_capture_vma *
> +capture_vma(struct intel_engine_capture_vma *next,
> +         struct i915_vma *vma,
> +         const char *name,
> +         gfp_t gfp)
> +{
> +     struct i915_vma_snapshot *vsnap;
> +
> +     if (!vma)
> +             return next;
> +
> +     /*
> +      * If the vma isn't pinned, then the vma should be snapshotted
> +      * to a struct i915_vma_snapshot at command submission time.
> +      * Not here.
> +      */
> +     GEM_WARN_ON(!i915_vma_is_pinned(vma));
> +     if (!i915_vma_is_pinned(vma))
> +             return next;
> +
> +     vsnap = i915_vma_snapshot_alloc(gfp);
> +     if (!vsnap)
> +             return next;
> +
> +     i915_vma_snapshot_init(vsnap, vma, name);
> +     next = capture_vma_snapshot(next, vsnap, gfp);
> +
> +     /* FIXME: Replace on async unbind. */
> +     i915_vma_snapshot_put(vsnap);
> +
> +     return next;
> +}
> +
>  static struct intel_engine_capture_vma *
>  capture_user(struct intel_engine_capture_vma *capture,
>            const struct i915_request *rq,
> @@ -1363,7 +1397,7 @@ capture_user(struct intel_engine_capture_vma *capture,
>       struct i915_capture_list *c;
>  
>       for (c = rq->capture_list; c; c = c->next)
> -             capture = capture_vma(capture, c->vma, "user", gfp);
> +             capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
>  
>       return capture;
>  }
> @@ -1377,6 +1411,36 @@ static void add_vma(struct intel_engine_coredump *ee,
>       }
>  }
>  
> +static struct i915_vma_coredump *
> +create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
> +                 const char *name, struct i915_vma_compress *compress)
> +{
> +     struct i915_vma_coredump *ret = NULL;
> +     struct i915_vma_snapshot tmp;
> +     bool lockdep_cookie;
> +
> +     if (!vma)
> +             return NULL;
> +
> +     i915_vma_snapshot_init_onstack(&tmp, vma, name);
> +     if (i915_vma_snapshot_resource_pin(&tmp, &lockdep_cookie)) {
> +             ret = i915_vma_coredump_create(gt, &tmp, compress);
> +             i915_vma_snapshot_resource_unpin(&tmp, lockdep_cookie);
> +     }
> +     i915_vma_snapshot_put_onstack(&tmp);
> +
> +     return ret;
> +}
> +
> +static void add_vma_coredump(struct intel_engine_coredump *ee,
> +                          const struct intel_gt *gt,
> +                          struct i915_vma *vma,
> +                          const char *name,
> +                          struct i915_vma_compress *compress)
> +{
> +     add_vma(ee, create_vma_coredump(gt, vma, name, compress));
> +}
> +
>  struct intel_engine_coredump *
>  intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
>  {
> @@ -1410,7 +1474,7 @@ intel_engine_coredump_add_request(struct 
> intel_engine_coredump *ee,
>        * as the simplest method to avoid being overwritten
>        * by userspace.
>        */
> -     vma = capture_vma(vma, rq->batch, "batch", gfp);
> +     vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
>       vma = capture_user(vma, rq, gfp);
>       vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
>       vma = capture_vma(vma, rq->context->state, "HW context", gfp);
> @@ -1431,30 +1495,24 @@ intel_engine_coredump_add_vma(struct 
> intel_engine_coredump *ee,
>  
>       while (capture) {
>               struct intel_engine_capture_vma *this = capture;
> -             struct i915_vma *vma = this->vma;
> +             struct i915_vma_snapshot *vsnap = this->vsnap;
>  
>               add_vma(ee,
>                       i915_vma_coredump_create(engine->gt,
> -                                              vma, this->name,
> -                                              compress));
> +                                              vsnap, compress));
>  
> -             i915_active_release(&vma->active);
> +             i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
> +             i915_vma_snapshot_put(vsnap);
>  
>               capture = this->next;
>               kfree(this);
>       }
>  
> -     add_vma(ee,
> -             i915_vma_coredump_create(engine->gt,
> -                                      engine->status_page.vma,
> -                                      "HW Status",
> -                                      compress));
> +     add_vma_coredump(ee, engine->gt, engine->status_page.vma,
> +                      "HW Status", compress);
>  
> -     add_vma(ee,
> -             i915_vma_coredump_create(engine->gt,
> -                                      engine->wa_ctx.vma,
> -                                      "WA context",
> -                                      compress));
> +     add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma,
> +                      "WA context", compress);
>  }
>  
>  static struct intel_engine_coredump *
> @@ -1490,17 +1548,25 @@ capture_engine(struct intel_engine_cs *engine,
>               }
>       }
>       if (rq)
> -             capture = intel_engine_coredump_add_request(ee, rq,
> -                                                         ATOMIC_MAYFAIL);
> +             rq = i915_request_get_rcu(rq);
> +
> +     if (!rq)
> +             goto no_request_capture;
> +
> +     capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
>       if (!capture) {
> -no_request_capture:
> -             kfree(ee);
> -             return NULL;
> +             i915_request_put(rq);
> +             goto no_request_capture;
>       }
>  
>       intel_engine_coredump_add_vma(ee, capture, compress);
> +     i915_request_put(rq);
>  
>       return ee;
> +
> +no_request_capture:
> +     kfree(ee);
> +     return NULL;
>  }
>  
>  static void
> @@ -1554,10 +1620,8 @@ gt_record_uc(struct intel_gt_coredump *gt,
>        */
>       error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
>       error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
> -     error_uc->guc_log =
> -             i915_vma_coredump_create(gt->_gt,
> -                                      uc->guc.log.vma, "GuC log buffer",
> -                                      compress);
> +     error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
> +                                             "GuC log buffer", compress);
>  
>       return error_uc;
>  }
> @@ -1843,8 +1907,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump 
> *gt,
>       kfree(compress);
>  }
>  
> -struct i915_gpu_coredump *
> -i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
> +static struct i915_gpu_coredump *
> +__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
>  {
>       struct drm_i915_private *i915 = gt->i915;
>       struct i915_gpu_coredump *error;
> @@ -1885,6 +1949,22 @@ i915_gpu_coredump(struct intel_gt *gt, 
> intel_engine_mask_t engine_mask)
>       return error;
>  }
>  
> +struct i915_gpu_coredump *
> +i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
> +{
> +     static DEFINE_MUTEX(capture_mutex);
> +     int ret = mutex_lock_interruptible(&capture_mutex);
> +     struct i915_gpu_coredump *dump;
> +
> +     if (ret)
> +             return ERR_PTR(ret);
> +
> +     dump = __i915_gpu_coredump(gt, engine_mask);
> +     mutex_unlock(&capture_mutex);
> +
> +     return dump;
> +}
> +
>  void i915_error_state_store(struct i915_gpu_coredump *error)
>  {
>       struct drm_i915_private *i915;
> diff --git a/drivers/gpu/drm/i915/i915_request.c 
> b/drivers/gpu/drm/i915/i915_request.c
> index 623273aca09e..ee855dc86972 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -113,6 +113,10 @@ static void i915_fence_release(struct dma_fence *fence)
>       GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
>                  rq->guc_prio != GUC_PRIO_FINI);
>  
> +     i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
> +     if (i915_vma_snapshot_present(&rq->batch_snapshot))
> +             i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
> +
>       /*
>        * The request is put onto a RCU freelist (i.e. the address
>        * is immediately reused), mark the fences as being freed now.
> @@ -186,19 +190,6 @@ void i915_request_notify_execute_cb_imm(struct 
> i915_request *rq)
>       __notify_execute_cb(rq, irq_work_imm);
>  }
>  
> -static void free_capture_list(struct i915_request *request)
> -{
> -     struct i915_capture_list *capture;
> -
> -     capture = fetch_and_zero(&request->capture_list);
> -     while (capture) {
> -             struct i915_capture_list *next = capture->next;
> -
> -             kfree(capture);
> -             capture = next;
> -     }
> -}
> -
>  static void __i915_request_fill(struct i915_request *rq, u8 val)
>  {
>       void *vaddr = rq->ring->vaddr;
> @@ -303,6 +294,37 @@ static void __rq_cancel_watchdog(struct i915_request *rq)
>               i915_request_put(rq);
>  }
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
> +
> +/**
> + * i915_request_free_capture_list - Free a capture list
> + * @capture: Pointer to the first list item or NULL
> + *
> + */
> +void i915_request_free_capture_list(struct i915_capture_list *capture)
> +{
> +     while (capture) {
> +             struct i915_capture_list *next = capture->next;
> +
> +             i915_vma_snapshot_put(capture->vma_snapshot);
> +             capture = next;
> +     }
> +}
> +
> +#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list)
> +
> +#define clear_capture_list(_rq) ((_rq)->capture_list = NULL)
> +
> +#else
> +
> +#define i915_request_free_capture_list(_a) do {} while (0)
> +
> +#define assert_capture_list_is_null(_a) do {} while (0)
> +
> +#define clear_capture_list(_rq) do {} while (0)
> +
> +#endif
> +
>  bool i915_request_retire(struct i915_request *rq)
>  {
>       if (!__i915_request_is_complete(rq))
> @@ -359,7 +381,6 @@ bool i915_request_retire(struct i915_request *rq)
>       intel_context_exit(rq->context);
>       intel_context_unpin(rq->context);
>  
> -     free_capture_list(rq);
>       i915_sched_node_fini(&rq->sched);
>       i915_request_put(rq);
>  
> @@ -829,11 +850,18 @@ static void __i915_request_ctor(void *arg)
>       i915_sw_fence_init(&rq->submit, submit_notify);
>       i915_sw_fence_init(&rq->semaphore, semaphore_notify);
>  
> -     rq->capture_list = NULL;
> +     clear_capture_list(rq);
> +     rq->batch_snapshot.present = false;
>  
>       init_llist_head(&rq->execute_cb);
>  }
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#define clear_batch_ptr(_rq) ((_rq)->batch = NULL)
> +#else
> +#define clear_batch_ptr(_a) do {} while (0)
> +#endif
> +
>  struct i915_request *
>  __i915_request_create(struct intel_context *ce, gfp_t gfp)
>  {
> @@ -925,10 +953,11 @@ __i915_request_create(struct intel_context *ce, gfp_t 
> gfp)
>       i915_sched_node_reinit(&rq->sched);
>  
>       /* No zalloc, everything must be cleared after use */
> -     rq->batch = NULL;
> +     clear_batch_ptr(rq);
>       __rq_init_watchdog(rq);
> -     GEM_BUG_ON(rq->capture_list);
> +     assert_capture_list_is_null(rq);
>       GEM_BUG_ON(!llist_empty(&rq->execute_cb));
> +     GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
>  
>       /*
>        * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_request.h 
> b/drivers/gpu/drm/i915/i915_request.h
> index dc359242d1ae..16aba0cb32d6 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -40,6 +40,7 @@
>  #include "i915_scheduler.h"
>  #include "i915_selftest.h"
>  #include "i915_sw_fence.h"
> +#include "i915_vma_snapshot.h"
>  
>  #include <uapi/drm/i915_drm.h>
>  
> @@ -48,11 +49,17 @@ struct drm_i915_gem_object;
>  struct drm_printer;
>  struct i915_request;
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>  struct i915_capture_list {
> +     struct i915_vma_snapshot *vma_snapshot;
>       struct i915_capture_list *next;
> -     struct i915_vma *vma;
>  };
>  
> +void i915_request_free_capture_list(struct i915_capture_list *capture);
> +#else
> +#define i915_request_free_capture_list(_a) do {} while (0)
> +#endif
> +
>  #define RQ_TRACE(rq, fmt, ...) do {                                  \
>       const struct i915_request *rq__ = (rq);                         \
>       ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt,  \
> @@ -289,10 +296,12 @@ struct i915_request {
>       /** Preallocate space in the ring for the emitting the request */
>       u32 reserved_space;
>  
> -     /** Batch buffer related to this request if any (used for
> -      * error state dump only).
> -      */
> -     struct i915_vma *batch;
> +     /** Batch buffer pointer for selftest internal use. */
> +     I915_SELFTEST_DECLARE(struct i915_vma *batch);
> +
> +     struct i915_vma_snapshot batch_snapshot;
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>       /**
>        * Additional buffers requested by userspace to be captured upon
>        * a GPU hang. The vma/obj on this list are protected by their
> @@ -300,6 +309,7 @@ struct i915_request {
>        * on the active_list (of their final request).
>        */
>       struct i915_capture_list *capture_list;
> +#endif
>  
>       /** Time at which this request was emitted, in jiffies. */
>       unsigned long emitted_jiffies;
> diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c 
> b/drivers/gpu/drm/i915/i915_vma_snapshot.c
> new file mode 100644
> index 000000000000..44985d600f96
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.c
> @@ -0,0 +1,137 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2021 Intel Corporation
> + */
> +
> +#include "i915_vma_snapshot.h"
> +#include "i915_vma_types.h"
> +#include "i915_vma.h"
> +
> +/**
> + * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
> + * a struct i915_vma.
> + * @vsnap: The i915_vma_snapshot to init.
> + * @vma: A struct i915_vma used to initialize @vsnap.
> + * @name: Name associated with the snapshot. The character pointer needs to
> + * stay alive over the lifitime of the shapsot
> + */
> +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
> +                         struct i915_vma *vma,
> +                         const char *name)
> +{
> +     if (!i915_vma_is_pinned(vma))
> +             assert_object_held(vma->obj);
> +
> +     vsnap->name = name;
> +     vsnap->size = vma->size;
> +     vsnap->obj_size = vma->obj->base.size;
> +     vsnap->gtt_offset = vma->node.start;
> +     vsnap->gtt_size = vma->node.size;
> +     vsnap->page_sizes = vma->page_sizes.gtt;
> +     vsnap->pages = vma->pages;
> +     vsnap->pages_rsgt = NULL;
> +     vsnap->mr = NULL;
> +     if (vma->obj->mm.rsgt)
> +             vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
> +     if (vma->obj->mm.region)
> +             vsnap->mr = intel_memory_region_get(vma->obj->mm.region);
> +     kref_init(&vsnap->kref);
> +     vsnap->vma_resource = &vma->active;
> +     vsnap->onstack = false;
> +     vsnap->present = true;
> +}
> +
> +/**
> + * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot 
> from
> + * a struct i915_vma, but avoid kfreeing it on last put.
> + * @vsnap: The i915_vma_snapshot to init.
> + * @vma: A struct i915_vma used to initialize @vsnap.
> + * @name: Name associated with the snapshot. The character pointer needs to
> + * stay alive over the lifitime of the shapsot
> + */
> +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
> +                                 struct i915_vma *vma,
> +                                 const char *name)
> +{
> +     i915_vma_snapshot_init(vsnap, vma, name);
> +     vsnap->onstack = true;
> +}
> +
> +static void vma_snapshot_release(struct kref *ref)
> +{
> +     struct i915_vma_snapshot *vsnap =
> +             container_of(ref, typeof(*vsnap), kref);
> +
> +     vsnap->present = false;
> +     if (vsnap->mr)
> +             intel_memory_region_put(vsnap->mr);
> +     if (vsnap->pages_rsgt)
> +             i915_refct_sgt_put(vsnap->pages_rsgt);
> +     if (!vsnap->onstack)
> +             kfree(vsnap);
> +}
> +
> +/**
> + * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
> + * @vsnap: The pointer reference
> + */
> +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
> +{
> +     kref_put(&vsnap->kref, vma_snapshot_release);
> +}
> +
> +/**
> + * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
> + * reference and varify that the structure is released
> + * @vsnap: The pointer reference
> + *
> + * This function is intended to be paired with a i915_vma_init_onstack()
> + * and should be called before exiting the scope that declared or
> + * freeing the structure that embedded @vsnap to verify that all references
> + * have been released.
> + */
> +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
> +{
> +     if (!kref_put(&vsnap->kref, vma_snapshot_release))
> +             GEM_BUG_ON(1);
> +}
> +
> +/**
> + * i915_vma_snapshot_resource_pin - Temporarily block the memory the
> + * vma snapshot is pointing to from being released.
> + * @vsnap: The vma snapshot.
> + * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
> + * to be passed to the paired i915_vma_snapshot_resource_unpin.
> + *
> + * This function will temporarily try to hold up a fence or similar structure
> + * and will therefore enter a fence signaling critical section.
> + *
> + * Return: true if we succeeded in blocking the memory from being released,
> + * false otherwise.
> + */
> +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
> +                                 bool *lockdep_cookie)
> +{
> +     bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
> +
> +     if (pinned)
> +             *lockdep_cookie = dma_fence_begin_signalling();
> +
> +     return pinned;
> +}
> +
> +/**
> + * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
> + * being released.
> + * @vsnap: The vma snapshot.
> + * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
> + *
> + * Might leave a fence signalling critical section and signal a fence.
> + */
> +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
> +                                   bool lockdep_cookie)
> +{
> +     dma_fence_end_signalling(lockdep_cookie);
> +
> +     return i915_active_release(vsnap->vma_resource);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h 
> b/drivers/gpu/drm/i915/i915_vma_snapshot.h
> new file mode 100644
> index 000000000000..940581df4622
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.h
> @@ -0,0 +1,112 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2021 Intel Corporation
> + */
> +#ifndef _I915_VMA_SNAPSHOT_H_
> +#define _I915_VMA_SNAPSHOT_H_
> +
> +#include <linux/kref.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +struct i915_active;
> +struct i915_refct_sgt;
> +struct i915_vma;
> +struct intel_memory_region;
> +struct sg_table;
> +
> +/**
> + * DOC: Simple utilities for snapshotting GPU vma metadata, later used for
> + * error capture. Vi use a separate header for this to avoid issues due to
> + * recursive header includes.
> + */
> +
> +/**
> + * struct i915_vma_snapshot - Snapshot of vma metadata.
> + * @size: The vma size in bytes.
> + * @obj_size: The size of the underlying object in bytes.
> + * @gtt_offset: The gtt offset the vma is bound to.
> + * @gtt_size: The size in bytes allocated for the vma in the GTT.
> + * @pages: The struct sg_table pointing to the pages bound.
> + * @pages_rsgt: The refcounted sg_table holding the reference for @pages if 
> any.
> + * @mr: The memory region pointed for the pages bound.
> + * @kref: Reference for this structure.
> + * @vma_resource: FIXME: A means to keep the unbind fence from signaling.
> + * Temporarily while we have only sync unbinds, and still use the vma
> + * active, we use that. With async unbinding we need a signaling refcount
> + * for the unbind fence.
> + * @page_sizes: The vma GTT page sizes information.
> + * @onstack: Whether the structure shouldn't be freed on final put.
> + * @present: Whether the structure is present and initialized.
> + */
> +struct i915_vma_snapshot {
> +     const char *name;
> +     size_t size;
> +     size_t obj_size;
> +     size_t gtt_offset;
> +     size_t gtt_size;
> +     struct sg_table *pages;
> +     struct i915_refct_sgt *pages_rsgt;
> +     struct intel_memory_region *mr;
> +     struct kref kref;
> +     struct i915_active *vma_resource;
> +     u32 page_sizes;
> +     bool onstack:1;
> +     bool present:1;
> +};
> +
> +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
> +                         struct i915_vma *vma,
> +                         const char *name);
> +
> +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
> +                                 struct i915_vma *vma,
> +                                 const char *name);
> +
> +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
> +
> +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
> +
> +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
> +                                 bool *lockdep_cookie);
> +
> +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
> +                                   bool lockdep_cookie);
> +
> +/**
> + * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
> + * @gfp: Allocation mode.
> + *
> + * Return: A pointer to a struct i915_vma_snapshot if successful.
> + * NULL otherwise.
> + */
> +static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
> +{
> +     return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
> +}
> +
> +/**
> + * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
> + *
> + * Return: A pointer to a struct i915_vma_snapshot.
> + */
> +static inline struct i915_vma_snapshot *
> +i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
> +{
> +     kref_get(&vsnap->kref);
> +     return vsnap;
> +}
> +
> +/**
> + * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
> + * present and initialized.
> + *
> + * Return: true if present and initialized; false otherwise.
> + */
> +static inline bool
> +i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
> +{
> +     return vsnap && vsnap->present;
> +}
> +
> +#endif
> -- 
> 2.31.1
>

Re: [PATCH v6 3/4] drm/i915: Update error capture code to avoid using the current vma state

Reply via email to