Re: [Intel-gfx] [PATCH v8 22/22] drm/i915/vm_bind: Support capture of persistent mappings

2022-12-08 Thread Niranjana Vishwanathapura

On Tue, Dec 06, 2022 at 05:40:54PM +, Matthew Auld wrote:

On 01/12/2022 18:43, Niranjana Vishwanathapura wrote:

On Thu, Dec 01, 2022 at 07:27:31AM -0800, Niranjana Vishwanathapura wrote:

On Thu, Dec 01, 2022 at 10:49:15AM +, Matthew Auld wrote:

On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:

Support dump capture of persistent mappings upon user request.

Signed-off-by: Brian Welty 
Signed-off-by: Niranjana Vishwanathapura 


---
.../drm/i915/gem/i915_gem_vm_bind_object.c    | 11 +++
drivers/gpu/drm/i915/gt/intel_gtt.c   |  3 +++
drivers/gpu/drm/i915/gt/intel_gtt.h   |  5 +
drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++
drivers/gpu/drm/i915/i915_vma.c   |  1 +
drivers/gpu/drm/i915/i915_vma_types.h |  2 ++
include/uapi/drm/i915_drm.h   |  3 ++-
7 files changed, 43 insertions(+), 1 deletion(-)

diff --git 
a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c

index 78e7c0642c5f..50969613daf6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -88,6 +88,11 @@ static void i915_gem_vm_bind_remove(struct 
i915_vma *vma, bool release_obj)

{
lockdep_assert_held(>vm->vm_bind_lock);
+    spin_lock(>vm->vm_capture_lock);
+    if (!list_empty(>vm_capture_link))
+    list_del_init(>vm_capture_link);
+    spin_unlock(>vm->vm_capture_lock);
+
spin_lock(>vm->vm_rebind_lock);
if (!list_empty(>vm_rebind_link))
    list_del_init(>vm_rebind_link);
@@ -357,6 +362,12 @@ static int i915_gem_vm_bind_obj(struct 
i915_address_space *vm,

    continue;
    }
+    if (va->flags & I915_GEM_VM_BIND_CAPTURE) {
+    spin_lock(>vm_capture_lock);
+    list_add_tail(>vm_capture_link, 
>vm_capture_list);

+    spin_unlock(>vm_capture_lock);
+    }
+
    list_add_tail(>vm_bind_link, >vm_bound_list);
    i915_vm_bind_it_insert(vma, >va);
    if (!obj->priv_root)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c

index ebf6830574a0..bdabe13fc30e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -297,6 +297,9 @@ void i915_address_space_init(struct 
i915_address_space *vm, int subclass)

spin_lock_init(>vm_rebind_lock);
spin_lock_init(>userptr_invalidated_lock);
INIT_LIST_HEAD(>userptr_invalidated_list);
+
+    INIT_LIST_HEAD(>vm_capture_list);
+    spin_lock_init(>vm_capture_lock);
}
void *__px_vaddr(struct drm_i915_gem_object *p)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h

index 87e5b6568a00..8e4ddd073348 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -281,6 +281,11 @@ struct i915_address_space {
/** @root_obj: root object for dma-resv sharing by private 
objects */

struct drm_i915_gem_object *root_obj;
+    /* @vm_capture_list: list of vm captures */
+    struct list_head vm_capture_list;
+    /* @vm_capture_lock: protects vm_capture_list */
+    spinlock_t vm_capture_lock;
+
/* Global GTT */
bool is_ggtt:1;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c

index 9d5d5a397b64..3b2b12a739f7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1460,6 +1460,22 @@ capture_vma(struct 
intel_engine_capture_vma *next,

return next;
}
+static struct intel_engine_capture_vma *
+capture_user_vm(struct intel_engine_capture_vma *capture,
+    struct i915_address_space *vm, gfp_t gfp)
+{
+    struct i915_vma *vma;
+
+    spin_lock(>vm_capture_lock);


Does it make sense to move this into the eb3 submission stage, 
like we do for eb2? IIRC the gfp flags here are quite limiting 
due to potentially being in a fence critical section. If we can 
use rq->capture_list for eb3, we shouldn't need to change much 
here?




But that will add latency on submission path as we will have to iterate
over capture_list in each submission. Besides, unlike eb2 case, we can't
just transfer the list to rq->capture_list as we will have to do this
for each submission. It was discussed long time back and decided not to
bother the fast path (submision) scenario with this error capture which
is only required upon gpu hang (slow path).


Maybe add some of this to the commit message, just to give a bit more 
back story/history. From my pov I'm coming into this semi-blind.




Ok, will add.



Also there is the existing CONFIG_DRM_I915_CAPTURE_ERROR. Should 
we take that into account?




Ok, will fix.

+    /* vma->resource must be valid here as persistent vmas 
are bound */

+    list_for_each_entry(vma, >vm_capture_list, vm_capture_link)
+    capture = capture_vma_snapshot(capture, vma->resource,
+   gfp, "user");
+    spin_unlock(>vm_capture_lock);
+

Re: [Intel-gfx] [PATCH v8 22/22] drm/i915/vm_bind: Support capture of persistent mappings

2022-12-06 Thread Matthew Auld

On 01/12/2022 18:43, Niranjana Vishwanathapura wrote:

On Thu, Dec 01, 2022 at 07:27:31AM -0800, Niranjana Vishwanathapura wrote:

On Thu, Dec 01, 2022 at 10:49:15AM +, Matthew Auld wrote:

On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:

Support dump capture of persistent mappings upon user request.

Signed-off-by: Brian Welty 
Signed-off-by: Niranjana Vishwanathapura 


---
.../drm/i915/gem/i915_gem_vm_bind_object.c    | 11 +++
drivers/gpu/drm/i915/gt/intel_gtt.c   |  3 +++
drivers/gpu/drm/i915/gt/intel_gtt.h   |  5 +
drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++
drivers/gpu/drm/i915/i915_vma.c   |  1 +
drivers/gpu/drm/i915/i915_vma_types.h |  2 ++
include/uapi/drm/i915_drm.h   |  3 ++-
7 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c

index 78e7c0642c5f..50969613daf6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -88,6 +88,11 @@ static void i915_gem_vm_bind_remove(struct 
i915_vma *vma, bool release_obj)

{
lockdep_assert_held(>vm->vm_bind_lock);
+    spin_lock(>vm->vm_capture_lock);
+    if (!list_empty(>vm_capture_link))
+    list_del_init(>vm_capture_link);
+    spin_unlock(>vm->vm_capture_lock);
+
spin_lock(>vm->vm_rebind_lock);
if (!list_empty(>vm_rebind_link))
    list_del_init(>vm_rebind_link);
@@ -357,6 +362,12 @@ static int i915_gem_vm_bind_obj(struct 
i915_address_space *vm,

    continue;
    }
+    if (va->flags & I915_GEM_VM_BIND_CAPTURE) {
+    spin_lock(>vm_capture_lock);
+    list_add_tail(>vm_capture_link, 
>vm_capture_list);

+    spin_unlock(>vm_capture_lock);
+    }
+
    list_add_tail(>vm_bind_link, >vm_bound_list);
    i915_vm_bind_it_insert(vma, >va);
    if (!obj->priv_root)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c

index ebf6830574a0..bdabe13fc30e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -297,6 +297,9 @@ void i915_address_space_init(struct 
i915_address_space *vm, int subclass)

spin_lock_init(>vm_rebind_lock);
spin_lock_init(>userptr_invalidated_lock);
INIT_LIST_HEAD(>userptr_invalidated_list);
+
+    INIT_LIST_HEAD(>vm_capture_list);
+    spin_lock_init(>vm_capture_lock);
}
void *__px_vaddr(struct drm_i915_gem_object *p)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h

index 87e5b6568a00..8e4ddd073348 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -281,6 +281,11 @@ struct i915_address_space {
/** @root_obj: root object for dma-resv sharing by private 
objects */

struct drm_i915_gem_object *root_obj;
+    /* @vm_capture_list: list of vm captures */
+    struct list_head vm_capture_list;
+    /* @vm_capture_lock: protects vm_capture_list */
+    spinlock_t vm_capture_lock;
+
/* Global GTT */
bool is_ggtt:1;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c

index 9d5d5a397b64..3b2b12a739f7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1460,6 +1460,22 @@ capture_vma(struct intel_engine_capture_vma 
*next,

return next;
}
+static struct intel_engine_capture_vma *
+capture_user_vm(struct intel_engine_capture_vma *capture,
+    struct i915_address_space *vm, gfp_t gfp)
+{
+    struct i915_vma *vma;
+
+    spin_lock(>vm_capture_lock);


Does it make sense to move this into the eb3 submission stage, like 
we do for eb2? IIRC the gfp flags here are quite limiting due to 
potentially being in a fence critical section. If we can use 
rq->capture_list for eb3, we shouldn't need to change much here?




But that will add latency on submission path as we will have to iterate
over capture_list in each submission. Besides, unlike eb2 case, we can't
just transfer the list to rq->capture_list as we will have to do this
for each submission. It was discussed long time back and decided not to
bother the fast path (submision) scenario with this error capture which
is only required upon gpu hang (slow path).


Maybe add some of this to the commit message, just to give a bit more 
back story/history. From my pov I'm coming into this semi-blind.




Also there is the existing CONFIG_DRM_I915_CAPTURE_ERROR. Should we 
take that into account?




Ok, will fix.

+    /* vma->resource must be valid here as persistent vmas are 
bound */

+    list_for_each_entry(vma, >vm_capture_list, vm_capture_link)
+    capture = capture_vma_snapshot(capture, vma->resource,
+   gfp, "user");
+    spin_unlock(>vm_capture_lock);
+
+    return capture;
+}
+
static struct intel_engine_capture_vma *

Re: [Intel-gfx] [PATCH v8 22/22] drm/i915/vm_bind: Support capture of persistent mappings

2022-12-01 Thread Niranjana Vishwanathapura

On Thu, Dec 01, 2022 at 07:27:31AM -0800, Niranjana Vishwanathapura wrote:

On Thu, Dec 01, 2022 at 10:49:15AM +, Matthew Auld wrote:

On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:

Support dump capture of persistent mappings upon user request.

Signed-off-by: Brian Welty 
Signed-off-by: Niranjana Vishwanathapura 
---
.../drm/i915/gem/i915_gem_vm_bind_object.c| 11 +++
drivers/gpu/drm/i915/gt/intel_gtt.c   |  3 +++
drivers/gpu/drm/i915/gt/intel_gtt.h   |  5 +
drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++
drivers/gpu/drm/i915/i915_vma.c   |  1 +
drivers/gpu/drm/i915/i915_vma_types.h |  2 ++
include/uapi/drm/i915_drm.h   |  3 ++-
7 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
index 78e7c0642c5f..50969613daf6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -88,6 +88,11 @@ static void i915_gem_vm_bind_remove(struct i915_vma *vma, 
bool release_obj)
{
lockdep_assert_held(>vm->vm_bind_lock);
+   spin_lock(>vm->vm_capture_lock);
+   if (!list_empty(>vm_capture_link))
+   list_del_init(>vm_capture_link);
+   spin_unlock(>vm->vm_capture_lock);
+
spin_lock(>vm->vm_rebind_lock);
if (!list_empty(>vm_rebind_link))
list_del_init(>vm_rebind_link);
@@ -357,6 +362,12 @@ static int i915_gem_vm_bind_obj(struct i915_address_space 
*vm,
continue;
}
+   if (va->flags & I915_GEM_VM_BIND_CAPTURE) {
+   spin_lock(>vm_capture_lock);
+   list_add_tail(>vm_capture_link, 
>vm_capture_list);
+   spin_unlock(>vm_capture_lock);
+   }
+
list_add_tail(>vm_bind_link, >vm_bound_list);
i915_vm_bind_it_insert(vma, >va);
if (!obj->priv_root)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index ebf6830574a0..bdabe13fc30e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -297,6 +297,9 @@ void i915_address_space_init(struct i915_address_space *vm, 
int subclass)
spin_lock_init(>vm_rebind_lock);
spin_lock_init(>userptr_invalidated_lock);
INIT_LIST_HEAD(>userptr_invalidated_list);
+
+   INIT_LIST_HEAD(>vm_capture_list);
+   spin_lock_init(>vm_capture_lock);
}
void *__px_vaddr(struct drm_i915_gem_object *p)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 87e5b6568a00..8e4ddd073348 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -281,6 +281,11 @@ struct i915_address_space {
/** @root_obj: root object for dma-resv sharing by private objects */
struct drm_i915_gem_object *root_obj;
+   /* @vm_capture_list: list of vm captures */
+   struct list_head vm_capture_list;
+   /* @vm_capture_lock: protects vm_capture_list */
+   spinlock_t vm_capture_lock;
+
/* Global GTT */
bool is_ggtt:1;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d5d5a397b64..3b2b12a739f7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1460,6 +1460,22 @@ capture_vma(struct intel_engine_capture_vma *next,
return next;
}
+static struct intel_engine_capture_vma *
+capture_user_vm(struct intel_engine_capture_vma *capture,
+   struct i915_address_space *vm, gfp_t gfp)
+{
+   struct i915_vma *vma;
+
+   spin_lock(>vm_capture_lock);


Does it make sense to move this into the eb3 submission stage, like 
we do for eb2? IIRC the gfp flags here are quite limiting due to 
potentially being in a fence critical section. If we can use 
rq->capture_list for eb3, we shouldn't need to change much here?




But that will add latency on submission path as we will have to iterate
over capture_list in each submission. Besides, unlike eb2 case, we can't
just transfer the list to rq->capture_list as we will have to do this
for each submission. It was discussed long time back and decided not to
bother the fast path (submision) scenario with this error capture which
is only required upon gpu hang (slow path).

Also there is the existing CONFIG_DRM_I915_CAPTURE_ERROR. Should we 
take that into account?




Ok, will fix.


+   /* vma->resource must be valid here as persistent vmas are bound */
+   list_for_each_entry(vma, >vm_capture_list, vm_capture_link)
+   capture = capture_vma_snapshot(capture, vma->resource,
+  gfp, "user");
+   spin_unlock(>vm_capture_lock);
+
+   return capture;
+}
+
static struct