Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Das, Nirmoy


On 6/22/2021 12:36 PM, Christian König wrote:

Am 22.06.21 um 12:30 schrieb Das, Nirmoy:


On 6/22/2021 10:36 AM, Christian König wrote:


Am 22.06.21 um 09:39 schrieb Das, Nirmoy:


On 6/22/2021 9:03 AM, Christian König wrote:



Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 
-

  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned int pasid,
+ unsigned int *vm_pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+    if (vm_pasid)
+    *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.



xarray looks great, with that we don't need pasid_lock either.


You still need the lock to protect against VM destruction while 
looking things up, but you could switch to RCU for this instead.



xarray has xa_{lock|unloack}_irqsave() and adev->vm_manager.pasid_xa 
will exist till devices's lifetime.


That's just a wrapper around the lock.


So I am thinking something like:

amdgpu_vm_pasid_insert()

{

...

xa_lock_irqsave(adev->vm_manager.pasids, flags)
r = xa_store(>vm_manager.pasids, pasid, vm, GFP_ATOMIC);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)


It would be really nice if we could avoid the GFP_ATOMIC here, but not 
much of a problem since we had that before.



I think it is possible as I think  only amdgpu_vm_handle_fault() runs in 
interrupt context which tries to find a vm ptr not store it.






}

amdgpu_vm_pasid_remove()

{



xa_lock_irqsave(adev->vm_manager.pasids, flags)
xa_erase(>vm_manager.pasids, pasid);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)

}


xa_{lock|unloack}_irqsave() can be use while looking up vm ptr for a 
pasid.



Shouldn't this be enough ?



Yeah I think so.



Great.


Nirmoy



Christian.



Regards,

Nirmoy



Christian.




Thanks

Nirmoy




Christian.


+    return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   unsigned int pasid,
+   unsigned int *vm_pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    if (vm_pasid)
+    *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent 
deadlocks when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)


  amdgpu_bo_unreserve(vm->root.bo);

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto error_free_root;
-
-    vm->pasid = pasid;
-    }
+    if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+    goto error_free_root;

  INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  r = amdgpu_vm_check_clean_reserved(adev, vm);
  if (r)
  goto unreserve_bo;
+    r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+    if (r ==  -ENOSPC)
+    goto unreserve_bo;

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-    if (r == -ENOSPC)
-    goto unreserve_bo;
-    r = 0;
-    }
+    r = 0;

  /* Check if PD needs to be reinitialized and do it before
   * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  vm->is_compute_context = true;

  if (vm->pasid) {
-   

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Christian König




Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 -
  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+unsigned int pasid,
+unsigned int *vm_pasid)
+{
+   unsigned long flags;
+   int r;
+
+   if (!pasid)
+   return 0;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+ GFP_ATOMIC);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   if (r < 0)
+   return r;
+   if (vm_pasid)
+   *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.


Christian.


+   return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+  unsigned int pasid,
+  unsigned int *vm_pasid)
+{
+   unsigned long flags;
+
+   if (!pasid)
+   return;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   idr_remove(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+   if (vm_pasid)
+   *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)

amdgpu_bo_unreserve(vm->root.bo);

-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   if (r < 0)
-   goto error_free_root;
-
-   vm->pasid = pasid;
-   }
+   if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+   goto error_free_root;

INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = amdgpu_vm_check_clean_reserved(adev, vm);
if (r)
goto unreserve_bo;
+   r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+   if (r ==  -ENOSPC)
+   goto unreserve_bo;

-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-   if (r == -ENOSPC)
-   goto unreserve_bo;
-   r = 0;
-   }
+   r = 0;

/* Check if PD needs to be reinitialized and do it before
 * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
vm->is_compute_context = true;

if (vm->pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, vm->pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
/* Free the original amdgpu allocated pasid
 * Will be replaced with kfd allocated pasid
 */
amdgpu_pasid_free(vm->pasid);
-   vm->pasid = 0;
+   amdgpu_vm_pasid_remove(adev, vm->pasid, >pasid);
}

/* Free the shadow bo for compute VM */
amdgpu_bo_unref(_amdgpu_bo_vm(vm->root.bo)->shadow);
-
if (pasid)
vm->pasid = pasid;

goto unreserve_bo;

  free_idr:
-   if (pasid) {
-   unsigned long flags;
+   amdgpu_vm_pasid_remove(adev, pasid, NULL);

-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   }
  unreserve_bo:

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Christian König

Am 22.06.21 um 12:30 schrieb Das, Nirmoy:


On 6/22/2021 10:36 AM, Christian König wrote:


Am 22.06.21 um 09:39 schrieb Das, Nirmoy:


On 6/22/2021 9:03 AM, Christian König wrote:



Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 
-

  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned int pasid,
+ unsigned int *vm_pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+    if (vm_pasid)
+    *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.



xarray looks great, with that we don't need pasid_lock either.


You still need the lock to protect against VM destruction while 
looking things up, but you could switch to RCU for this instead.



xarray has xa_{lock|unloack}_irqsave() and adev->vm_manager.pasid_xa 
will exist till devices's lifetime.


That's just a wrapper around the lock.


So I am thinking something like:

amdgpu_vm_pasid_insert()

{

...

xa_lock_irqsave(adev->vm_manager.pasids, flags)
r = xa_store(>vm_manager.pasids, pasid, vm, GFP_ATOMIC);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)


It would be really nice if we could avoid the GFP_ATOMIC here, but not 
much of a problem since we had that before.



}

amdgpu_vm_pasid_remove()

{



xa_lock_irqsave(adev->vm_manager.pasids, flags)
xa_erase(>vm_manager.pasids, pasid);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)

}


xa_{lock|unloack}_irqsave() can be use while looking up vm ptr for a 
pasid.



Shouldn't this be enough ?



Yeah I think so.

Christian.



Regards,

Nirmoy



Christian.




Thanks

Nirmoy




Christian.


+    return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   unsigned int pasid,
+   unsigned int *vm_pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    if (vm_pasid)
+    *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)


  amdgpu_bo_unreserve(vm->root.bo);

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto error_free_root;
-
-    vm->pasid = pasid;
-    }
+    if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+    goto error_free_root;

  INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  r = amdgpu_vm_check_clean_reserved(adev, vm);
  if (r)
  goto unreserve_bo;
+    r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+    if (r ==  -ENOSPC)
+    goto unreserve_bo;

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-    if (r == -ENOSPC)
-    goto unreserve_bo;
-    r = 0;
-    }
+    r = 0;

  /* Check if PD needs to be reinitialized and do it before
   * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  vm->is_compute_context = true;

  if (vm->pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
  /* Free the 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Das, Nirmoy


On 6/22/2021 10:36 AM, Christian König wrote:

Am 22.06.21 um 09:39 schrieb Das, Nirmoy:


On 6/22/2021 9:03 AM, Christian König wrote:



Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 
-

  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned int pasid,
+ unsigned int *vm_pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+    if (vm_pasid)
+    *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.



xarray looks great, with that we don't need pasid_lock either.


You still need the lock to protect against VM destruction while 
looking things up, but you could switch to RCU for this instead.



xarray has xa_{lock|unloack}_irqsave() and adev->vm_manager.pasid_xa 
will exist till devices's lifetime. So I am thinking something like:


amdgpu_vm_pasid_insert()

{

...

xa_lock_irqsave(adev->vm_manager.pasids, flags)
r = xa_store(>vm_manager.pasids, pasid, vm, GFP_ATOMIC);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)

}

amdgpu_vm_pasid_remove()

{



xa_lock_irqsave(adev->vm_manager.pasids, flags)
xa_erase(>vm_manager.pasids, pasid);
xa_unlock_irqsave(adev->vm_manager.pasids, flags)

}


xa_{lock|unloack}_irqsave() can be use while looking up vm ptr for a pasid.


Shouldn't this be enough ?


Regards,

Nirmoy



Christian.




Thanks

Nirmoy




Christian.


+    return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   unsigned int pasid,
+   unsigned int *vm_pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    if (vm_pasid)
+    *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)


  amdgpu_bo_unreserve(vm->root.bo);

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto error_free_root;
-
-    vm->pasid = pasid;
-    }
+    if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+    goto error_free_root;

  INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  r = amdgpu_vm_check_clean_reserved(adev, vm);
  if (r)
  goto unreserve_bo;
+    r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+    if (r ==  -ENOSPC)
+    goto unreserve_bo;

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-    if (r == -ENOSPC)
-    goto unreserve_bo;
-    r = 0;
-    }
+    r = 0;

  /* Check if PD needs to be reinitialized and do it before
   * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  vm->is_compute_context = true;

  if (vm->pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
  /* Free the original amdgpu allocated pasid
   * Will be replaced with kfd allocated pasid
   */
  amdgpu_pasid_free(vm->pasid);
-    vm->pasid = 0;
+    amdgpu_vm_pasid_remove(adev, vm->pasid, >pasid);
  }

   

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Christian König

Am 22.06.21 um 09:39 schrieb Das, Nirmoy:


On 6/22/2021 9:03 AM, Christian König wrote:



Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 
-

  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned int pasid,
+ unsigned int *vm_pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+    if (vm_pasid)
+    *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.



xarray looks great, with that we don't need pasid_lock either.


You still need the lock to protect against VM destruction while looking 
things up, but you could switch to RCU for this instead.


Christian.




Thanks

Nirmoy




Christian.


+    return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   unsigned int pasid,
+   unsigned int *vm_pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    if (vm_pasid)
+    *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS

   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)


  amdgpu_bo_unreserve(vm->root.bo);

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto error_free_root;
-
-    vm->pasid = pasid;
-    }
+    if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+    goto error_free_root;

  INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  r = amdgpu_vm_check_clean_reserved(adev, vm);
  if (r)
  goto unreserve_bo;
+    r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+    if (r ==  -ENOSPC)
+    goto unreserve_bo;

-    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-    if (r == -ENOSPC)
-    goto unreserve_bo;
-    r = 0;
-    }
+    r = 0;

  /* Check if PD needs to be reinitialized and do it before
   * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  vm->is_compute_context = true;

  if (vm->pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
  /* Free the original amdgpu allocated pasid
   * Will be replaced with kfd allocated pasid
   */
  amdgpu_pasid_free(vm->pasid);
-    vm->pasid = 0;
+    amdgpu_vm_pasid_remove(adev, vm->pasid, >pasid);
  }

  /* Free the shadow bo for compute VM */
amdgpu_bo_unref(_amdgpu_bo_vm(vm->root.bo)->shadow);
-
  if (pasid)
  vm->pasid = pasid;

  goto unreserve_bo;

  free_idr:
-    if (pasid) {
-    unsigned long flags;
+    amdgpu_vm_pasid_remove(adev, pasid, NULL);

- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    }
  unreserve_bo:
  amdgpu_bo_unreserve(vm->root.bo);
  return r;
@@ -3133,14 +3143,7 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

   */
  void amdgpu_vm_release_compute(struct amdgpu_device 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Das, Nirmoy


On 6/22/2021 9:03 AM, Christian König wrote:



Am 22.06.21 um 08:57 schrieb Nirmoy Das:

Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 -
  1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ unsigned int pasid,
+ unsigned int *vm_pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+    spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+    if (vm_pasid)
+    *vm_pasid = pasid;
+


Ok the more I read from this patch the less it makes sense.

We don't allocate the passid here, we just set it up in the idr.

What we could do is to replace the idr with an xarray, that would 
certainly make more sense than this here.



xarray looks great, with that we don't need pasid_lock either.


Thanks

Nirmoy




Christian.


+    return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   unsigned int pasid,
+   unsigned int *vm_pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+    spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    if (vm_pasid)
+    *vm_pasid = 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS

   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
struct amdgpu_vm *vm, u32 pasid)


  amdgpu_bo_unreserve(vm->root.bo);

-    if (pasid) {
-    unsigned long flags;
-
-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto error_free_root;
-
-    vm->pasid = pasid;
-    }
+    if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+    goto error_free_root;

  INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  r = amdgpu_vm_check_clean_reserved(adev, vm);
  if (r)
  goto unreserve_bo;
+    r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+    if (r ==  -ENOSPC)
+    goto unreserve_bo;

-    if (pasid) {
-    unsigned long flags;
-
-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-    if (r == -ENOSPC)
-    goto unreserve_bo;
-    r = 0;
-    }
+    r = 0;

  /* Check if PD needs to be reinitialized and do it before
   * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

  vm->is_compute_context = true;

  if (vm->pasid) {
-    unsigned long flags;
-
-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
  /* Free the original amdgpu allocated pasid
   * Will be replaced with kfd allocated pasid
   */
  amdgpu_pasid_free(vm->pasid);
-    vm->pasid = 0;
+    amdgpu_vm_pasid_remove(adev, vm->pasid, >pasid);
  }

  /* Free the shadow bo for compute VM */
amdgpu_bo_unref(_amdgpu_bo_vm(vm->root.bo)->shadow);
-
  if (pasid)
  vm->pasid = pasid;

  goto unreserve_bo;

  free_idr:
-    if (pasid) {
-    unsigned long flags;
+    amdgpu_vm_pasid_remove(adev, pasid, NULL);

-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    idr_remove(>vm_manager.pasid_idr, pasid);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    }
  unreserve_bo:
  amdgpu_bo_unreserve(vm->root.bo);
  return r;
@@ -3133,14 +3143,7 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,

   */
  void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)

  {
-    if (vm->pasid) {
-    unsigned long flags;
-
-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    

[PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-22 Thread Nirmoy Das
Cleanup code related to vm pasid by adding helper functions.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 -
 1 file changed, 50 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..6e476b173cbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
 };

+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+unsigned int pasid,
+unsigned int *vm_pasid)
+{
+   unsigned long flags;
+   int r;
+
+   if (!pasid)
+   return 0;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+ GFP_ATOMIC);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   if (r < 0)
+   return r;
+   if (vm_pasid)
+   *vm_pasid = pasid;
+
+   return 0;
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+  unsigned int pasid,
+  unsigned int *vm_pasid)
+{
+   unsigned long flags;
+
+   if (!pasid)
+   return;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   idr_remove(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+   if (vm_pasid)
+   *vm_pasid = 0;
+}
+
 /*
  * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
  * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)

amdgpu_bo_unreserve(vm->root.bo);

-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   if (r < 0)
-   goto error_free_root;
-
-   vm->pasid = pasid;
-   }
+   if (amdgpu_vm_pasid_alloc(adev, vm, pasid, >pasid))
+   goto error_free_root;

INIT_KFIFO(vm->faults);

@@ -3038,19 +3068,11 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = amdgpu_vm_check_clean_reserved(adev, vm);
if (r)
goto unreserve_bo;
+   r = amdgpu_vm_pasid_alloc(adev, vm, pasid, NULL);
+   if (r ==  -ENOSPC)
+   goto unreserve_bo;

-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-   if (r == -ENOSPC)
-   goto unreserve_bo;
-   r = 0;
-   }
+   r = 0;

/* Check if PD needs to be reinitialized and do it before
 * changing any other state, in case it fails.
@@ -3089,35 +3111,23 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
vm->is_compute_context = true;

if (vm->pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, vm->pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
/* Free the original amdgpu allocated pasid
 * Will be replaced with kfd allocated pasid
 */
amdgpu_pasid_free(vm->pasid);
-   vm->pasid = 0;
+   amdgpu_vm_pasid_remove(adev, vm->pasid, >pasid);
}

/* Free the shadow bo for compute VM */
amdgpu_bo_unref(_amdgpu_bo_vm(vm->root.bo)->shadow);
-
if (pasid)
vm->pasid = pasid;

goto unreserve_bo;

 free_idr:
-   if (pasid) {
-   unsigned long flags;
+   amdgpu_vm_pasid_remove(adev, pasid, NULL);

-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   }
 unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;
@@ -3133,14 +3143,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  */
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm 
*vm)
 {
-   if (vm->pasid) {
-   unsigned long flags;
-
-  

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy


On 6/21/2021 1:35 PM, Christian König wrote:

Am 21.06.21 um 13:27 schrieb Das, Nirmoy:


On 6/21/2021 1:18 PM, Christian König wrote:

Am 21.06.21 um 13:10 schrieb Das, Nirmoy:


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in 
amdgpu_vm_handle_fault() with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . 
I will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



I see the problem.



I don't think what you try to do here is possible at all.



Does it makes sense to resend without amdgpu_vm_handle_fault() 
changes ?


Some other changes don't make sense to me as well.

For example:

pasid = amdgpu_pasid_alloc(16);

Why do you want to allocate a hard coded pasid number here?



This is from  original amdgpu_driver_open_kms(). We are allocating a 
pasid number and passing that to


amdgpu_vm_init(). I wanted to move that to vmcode with this patch.


That doesn't make sense.

The pasid is a hardware identify which is unrelated to the VM in 
general and might at some point passed in from external.




Ok, makes sense. I will remove that part then.


Nirmoy


Please keep that as parameter to the VM code if possible.

Christian.




Regards,

Nirmoy




Christian.






Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device 
*adev,

+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+ idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Christian König

Am 21.06.21 um 13:27 schrieb Das, Nirmoy:


On 6/21/2021 1:18 PM, Christian König wrote:

Am 21.06.21 um 13:10 schrieb Das, Nirmoy:


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in 
amdgpu_vm_handle_fault() with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



I see the problem.



I don't think what you try to do here is possible at all.



Does it makes sense to resend without amdgpu_vm_handle_fault() 
changes ?


Some other changes don't make sense to me as well.

For example:

pasid = amdgpu_pasid_alloc(16);

Why do you want to allocate a hard coded pasid number here?



This is from  original amdgpu_driver_open_kms(). We are allocating a 
pasid number and passing that to


amdgpu_vm_init(). I wanted to move that to vmcode with this patch.


That doesn't make sense.

The pasid is a hardware identify which is unrelated to the VM in general 
and might at some point passed in from external.


Please keep that as parameter to the VM code if possible.

Christian.




Regards,

Nirmoy




Christian.






Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 
pasid + 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy


On 6/21/2021 1:18 PM, Christian König wrote:

Am 21.06.21 um 13:10 schrieb Das, Nirmoy:


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



I see the problem.



I don't think what you try to do here is possible at all.



Does it makes sense to resend without amdgpu_vm_handle_fault() changes ?


Some other changes don't make sense to me as well.

For example:

pasid = amdgpu_pasid_alloc(16);

Why do you want to allocate a hard coded pasid number here?



This is from  original amdgpu_driver_open_kms(). We are allocating a 
pasid number and passing that to


amdgpu_vm_init(). I wanted to move that to vmcode with this patch.


Regards,

Nirmoy




Christian.






Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure 
no reclaim-FS
   * happens while holding this lock anywhere to prevent 
deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct 
amdgpu_vm *vm, 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



I see the problem.



I don't think what you try to do here is possible at all.



Does it makes sense to resend without amdgpu_vm_handle_fault() changes ?



Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
  flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
*vm, u32 pasid)

+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
  struct amdgpu_bo *root_bo;
  struct amdgpu_bo_vm *root;
+    unsigned int 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Christian König

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.

I don't think what you try to do here is possible at all.

Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
  flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct amdgpu_device 
*adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
*vm, u32 pasid)

+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
  struct amdgpu_bo *root_bo;
  struct amdgpu_bo_vm *root;
+    unsigned int pasid;
  int r, i;
    vm->va = RB_ROOT_CACHED;
@@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I will 
split it and resend.


Am I missing something else ?


Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
  flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct amdgpu_device 
*adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS

   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
*vm, u32 pasid)

+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
  struct amdgpu_bo *root_bo;
  struct amdgpu_bo_vm *root;
+    unsigned int pasid;
  int r, i;
    vm->va = RB_ROOT_CACHED;
@@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)

    amdgpu_bo_unreserve(vm->root.bo);
  -    if (pasid) {
-    unsigned long flags;
-
- spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
  {
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv;
-   int r, pasid;
+   int r;
  
  	/* Ensure IB tests are run on ring */

flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
goto out_suspend;
}
  
-	pasid = amdgpu_pasid_alloc(16);

-   if (pasid < 0) {
-   dev_warn(adev->dev, "No more PASIDs available!");
-   pasid = 0;
-   }
-
-   r = amdgpu_vm_init(adev, >vm, pasid);
+   r = amdgpu_vm_init(adev, >vm);
if (r)
-   goto error_pasid;
+   goto free_fpriv;
  
  	fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);

if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
  error_vm:
amdgpu_vm_fini(adev, >vm);
  
-error_pasid:

-   if (pasid)
-   amdgpu_pasid_free(pasid);
-
+free_fpriv:
kfree(fpriv);
  
  out_suspend:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };
  
+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,

+struct amdgpu_vm *vm, unsigned int pasid)
+{
+   unsigned long flags;
+   int r;
+
+   if (!pasid)
+   return 0;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+ GFP_ATOMIC);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   if (r < 0)
+   return r;
+
+   vm->pasid = pasid;
+   return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+ unsigned int pasid)
+{
+   unsigned long flags;
+
+   if (!pasid)
+   return;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   idr_remove(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+  struct amdgpu_vm *vm)
+{
+   amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+   vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+struct amdgpu_vm *vm)
+{
+   if (!vm->pasid)
+   return;
+
+   amdgpu_pasid_free(vm->pasid);
+   amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct amdgpu_device *adev,
+ unsigned int pasid)
+{
+   struct amdgpu_vm *vm;
+   unsigned long flags;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   vm = idr_find(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+   return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long 
timeout)
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
+   unsigned int pasid;
int r, i;
  
  	vm->va = RB_ROOT_CACHED;

@@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)
  
  	amdgpu_bo_unreserve(vm->root.bo);
  
-	if (pasid) {

-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Christian König

Am 21.06.21 um 13:10 schrieb Das, Nirmoy:


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



I see the problem.



I don't think what you try to do here is possible at all.



Does it makes sense to resend without amdgpu_vm_handle_fault() changes ?


Some other changes don't make sense to me as well.

For example:

pasid = amdgpu_pasid_alloc(16);

Why do you want to allocate a hard coded pasid number here?

Christian.






Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent 
deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Christian König



Am 21.06.21 um 13:07 schrieb Das, Nirmoy:


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



The original code is also dropping and retaking the lock.  I don't 
understand the difference.



spin_lock_irqsave(>vm_manager.pasid_lock, irqflags);
vm = idr_find(>vm_manager.pasid_idr, pasid);
if (vm) {
    root = amdgpu_bo_ref(vm->root.bo);
    is_compute_context = vm->is_compute_context;
} else {
    root = NULL;
}
spin_unlock_irqrestore(>vm_manager.pasid_lock, irqflags);


What this code does is not only looking up the VM, but also taking a 
reference to the root PD.


This must both be done while holding the lock.

Christian.




Regards,

Nirmoy



I don't think what you try to do here is possible at all.

Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent 
deadlocks when
@@ -2859,17 +2922,17 @@ 

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Das, Nirmoy


On 6/21/2021 12:59 PM, Christian König wrote:

Am 21.06.21 um 12:56 schrieb Das, Nirmoy:


On 6/21/2021 12:26 PM, Christian König wrote:
Well you completely break the handling in amdgpu_vm_handle_fault() 
with this.



I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
will split it and resend.


Am I missing something else ?


The problem is you drop and re-take the lock now at the wrong time.



The original code is also dropping and retaking the lock.  I don't 
understand the difference.



Regards,

Nirmoy



I don't think what you try to do here is possible at all.

Christian.




Regards,

Nirmoy





Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 


  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
  flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
drm_device *dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
amdgpu_device *adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+ spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
*vm, u32 pasid)

+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
  struct amdgpu_bo *root_bo;
  struct amdgpu_bo_vm *root;
+    

Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-21 Thread Christian König
Well you completely break the handling in amdgpu_vm_handle_fault() with 
this.


Christian.

Am 21.06.21 um 11:47 schrieb Das, Nirmoy:

ping.

On 6/17/2021 3:03 PM, Nirmoy Das wrote:

Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
  3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  {
  struct amdgpu_device *adev = drm_to_adev(dev);
  struct amdgpu_fpriv *fpriv;
-    int r, pasid;
+    int r;
    /* Ensure IB tests are run on ring */
  flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  goto out_suspend;
  }
  -    pasid = amdgpu_pasid_alloc(16);
-    if (pasid < 0) {
-    dev_warn(adev->dev, "No more PASIDs available!");
-    pasid = 0;
-    }
-
-    r = amdgpu_vm_init(adev, >vm, pasid);
+    r = amdgpu_vm_init(adev, >vm);
  if (r)
-    goto error_pasid;
+    goto free_fpriv;
    fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
  if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

  error_vm:
  amdgpu_vm_fini(adev, >vm);
  -error_pasid:
-    if (pasid)
-    amdgpu_pasid_free(pasid);
-
+free_fpriv:
  kfree(fpriv);
    out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
  struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, unsigned int pasid)
+{
+    unsigned long flags;
+    int r;
+
+    if (!pasid)
+    return 0;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+  GFP_ATOMIC);
+    spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+    if (r < 0)
+    return r;
+
+    vm->pasid = pasid;
+    return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+  unsigned int pasid)
+{
+    unsigned long flags;
+
+    if (!pasid)
+    return;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    idr_remove(>vm_manager.pasid_idr, pasid);
+    spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+   struct amdgpu_vm *vm)
+{
+    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+    vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+    if (!vm->pasid)
+    return;
+
+    amdgpu_pasid_free(vm->pasid);
+    amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct amdgpu_device 
*adev,

+  unsigned int pasid)
+{
+    struct amdgpu_vm *vm;
+    unsigned long flags;
+
+    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+    vm = idr_find(>vm_manager.pasid_idr, pasid);
+    spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+    return vm;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS

   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
*vm, long timeout)

   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
- * @pasid: Process address space identifier
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 
u32 pasid)

+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
  struct amdgpu_bo *root_bo;
  struct amdgpu_bo_vm *root;
+    unsigned int pasid;
  int r, i;
    vm->va = RB_ROOT_CACHED;
@@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)

    amdgpu_bo_unreserve(vm->root.bo);
  -    if (pasid) {
-    unsigned long flags;
-
-    spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-    r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid 
+ 1,

-  GFP_ATOMIC);
- spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-    if (r < 0)
-    goto 

[PATCH 1/1] drm/amdgpu: add helper function for vm pasid

2021-06-17 Thread Nirmoy Das
Cleanup code related to vm pasid by adding helper function.
This reduces lots code duplication.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
 3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cbb932f97355..27851fb0e25b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct 
drm_file *file_priv)
 {
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv;
-   int r, pasid;
+   int r;
 
/* Ensure IB tests are run on ring */
flush_delayed_work(>delayed_init_work);
@@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
goto out_suspend;
}
 
-   pasid = amdgpu_pasid_alloc(16);
-   if (pasid < 0) {
-   dev_warn(adev->dev, "No more PASIDs available!");
-   pasid = 0;
-   }
-
-   r = amdgpu_vm_init(adev, >vm, pasid);
+   r = amdgpu_vm_init(adev, >vm);
if (r)
-   goto error_pasid;
+   goto free_fpriv;
 
fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
if (!fpriv->prt_va) {
@@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
 error_vm:
amdgpu_vm_fini(adev, >vm);
 
-error_pasid:
-   if (pasid)
-   amdgpu_pasid_free(pasid);
-
+free_fpriv:
kfree(fpriv);
 
 out_suspend:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..562c2c48a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
 };
 
+static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
+struct amdgpu_vm *vm, unsigned int pasid)
+{
+   unsigned long flags;
+   int r;
+
+   if (!pasid)
+   return 0;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
+ GFP_ATOMIC);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   if (r < 0)
+   return r;
+
+   vm->pasid = pasid;
+   return 0;
+}
+static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
+ unsigned int pasid)
+{
+   unsigned long flags;
+
+   if (!pasid)
+   return;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   idr_remove(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+}
+
+static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
+  struct amdgpu_vm *vm)
+{
+   amdgpu_vm_pasid_remove_id(adev, vm->pasid);
+   vm->pasid = 0;
+}
+
+static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
+struct amdgpu_vm *vm)
+{
+   if (!vm->pasid)
+   return;
+
+   amdgpu_pasid_free(vm->pasid);
+   amdgpu_vm_pasid_remove(adev, vm);
+}
+
+static struct amdgpu_vm *amdgpu_vm_pasid_find(struct amdgpu_device *adev,
+ unsigned int pasid)
+{
+   struct amdgpu_vm *vm;
+   unsigned long flags;
+
+   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
+   vm = idr_find(>vm_manager.pasid_idr, pasid);
+   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+
+   return vm;
+}
+
 /*
  * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
  * happens while holding this lock anywhere to prevent deadlocks when
@@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long 
timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @pasid: Process address space identifier
  *
  * Init @vm fields.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
+   unsigned int pasid;
int r, i;
 
vm->va = RB_ROOT_CACHED;
@@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)
 
amdgpu_bo_unreserve(vm->root.bo);
 
-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-