[PATCH 1/2] drm/amd/pm: Add I2C quirk table to Aldebaran

2021-07-02 Thread Luben Tuikov
Add I2C quirk table to Aldebaran.

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Lijo Lazar 
Cc: John Clements 
Cc: Hawking Zhang 
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index f30edf862b86d1..c1c7aefa9d8fdc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1509,6 +1509,14 @@ static const struct i2c_algorithm aldebaran_i2c_algo = {
.functionality = aldebaran_i2c_func,
 };
 
+static const struct i2c_adapter_quirks aldebaran_i2c_control_quirks = {
+   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR,
+   .max_read_len  = MAX_SW_I2C_COMMANDS,
+   .max_write_len = MAX_SW_I2C_COMMANDS,
+   .max_comb_1st_msg_len = 2,
+   .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2,
+};
+
 static int aldebaran_i2c_control_init(struct smu_context *smu, struct 
i2c_adapter *control)
 {
struct amdgpu_device *adev = to_amdgpu_device(control);
@@ -1519,6 +1527,7 @@ static int aldebaran_i2c_control_init(struct smu_context 
*smu, struct i2c_adapte
control->dev.parent = >pdev->dev;
control->algo = _i2c_algo;
snprintf(control->name, sizeof(control->name), "AMDGPU SMU");
+   control->quirks = _i2c_control_quirks;
 
res = i2c_add_adapter(control);
if (res)

base-commit: 81dfdb3e6a907ced8f915da3c65632f7a1616985
-- 
2.32.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: The I2C IP doesn't support 0 writes/reads

2021-07-02 Thread Luben Tuikov
The I2C IP doesn't support writes or reads of 0 bytes.

In order for a START/STOP transaction to take
place on the bus, the data written/read has to be
at least one byte.

That is, you cannot generate a write with 0 bytes,
just to get the ACK from a device, just so you can
probe that device if it is on the bus and so to
discover all devices on the bus--you'll have to
read at least one byte. Writes of 0 bytes generate
no START/STOP on this I2C IP--the bus is not
engaged at all.

Set the I2C_AQ_NO_ZERO_LEN to the existing I2C
quirk tables for Aldebaran, Arcturus, Navi10 and
Sienna Cichlid, and add a quirk table to the I2C
driver which drives the bus when the SMU
doesn't--for instance on Vega20.

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Lijo Lazar 
Cc: John Clements 
Cc: Hawking Zhang 
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c  | 5 +
 drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c   | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c  | 2 +-
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c 
b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index 7d74d6204d8d0a..73ffa8fde3df3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -702,6 +702,10 @@ static const struct i2c_algorithm smu_v11_0_i2c_algo = {
.functionality = smu_v11_0_i2c_func,
 };
 
+static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = {
+   .flags = I2C_AQ_NO_ZERO_LEN,
+};
+
 int smu_v11_0_i2c_control_init(struct i2c_adapter *control)
 {
struct amdgpu_device *adev = to_amdgpu_device(control);
@@ -714,6 +718,7 @@ int smu_v11_0_i2c_control_init(struct i2c_adapter *control)
control->algo = _v11_0_i2c_algo;
snprintf(control->name, sizeof(control->name), "AMDGPU SMU");
control->lock_ops = _v11_0_i2c_i2c_lock_ops;
+   control->quirks = _v11_0_i2c_control_quirks;
 
res = i2c_add_adapter(control);
if (res)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index b1adbe17b6c2d4..6b3e0ea10163a1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2022,7 +2022,7 @@ static const struct i2c_algorithm arcturus_i2c_algo = {
 
 
 static const struct i2c_adapter_quirks arcturus_i2c_control_quirks = {
-   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR,
+   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN,
.max_read_len  = MAX_SW_I2C_COMMANDS,
.max_write_len = MAX_SW_I2C_COMMANDS,
.max_comb_1st_msg_len = 2,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 36264b78199620..59ea59acfb00ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2820,7 +2820,7 @@ static const struct i2c_algorithm navi10_i2c_algo = {
 };
 
 static const struct i2c_adapter_quirks navi10_i2c_control_quirks = {
-   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR,
+   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN,
.max_read_len  = MAX_SW_I2C_COMMANDS,
.max_write_len = MAX_SW_I2C_COMMANDS,
.max_comb_1st_msg_len = 2,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 0c3407025eb29f..fb5b3ea6127342 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -3527,7 +3527,7 @@ static const struct i2c_algorithm sienna_cichlid_i2c_algo 
= {
 };
 
 static const struct i2c_adapter_quirks sienna_cichlid_i2c_control_quirks = {
-   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR,
+   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN,
.max_read_len  = MAX_SW_I2C_COMMANDS,
.max_write_len = MAX_SW_I2C_COMMANDS,
.max_comb_1st_msg_len = 2,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c1c7aefa9d8fdc..c16ca0c78e9306 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1510,7 +1510,7 @@ static const struct i2c_algorithm aldebaran_i2c_algo = {
 };
 
 static const struct i2c_adapter_quirks aldebaran_i2c_control_quirks = {
-   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR,
+   .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN,
.max_read_len  = MAX_SW_I2C_COMMANDS,
.max_write_len = MAX_SW_I2C_COMMANDS,
.max_comb_1st_msg_len = 2,
-- 
2.32.0


[PATCH] drm/amdgpu: Return error if no RAS

2021-07-02 Thread Luben Tuikov
In amdgpu_ras_query_error_count() return an error
if the device doesn't support RAS. This prevents
that function from having to always set the values
of the integer pointers (if set), and thus
prevents function side effects--always to have to
set values of integers if integer pointers set,
regardless of whether RAS is supported or
not--with this change this side effect is
mitigated.

Also, if no pointers are set, don't count, since
we've no way of reporting the counts.

Also, give this function a kernel-doc.

Cc: Alexander Deucher 
Cc: John Clements 
Cc: Hawking Zhang 
Reported-by: Tom Rix 
Fixes: a46751fbcde505 ("drm/amdgpu: Fix RAS function interface")
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  6 +--
 2 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index c6ae63893dbdb2..ed698b2be79023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -813,7 +813,7 @@ static int amdgpu_ras_enable_all_features(struct 
amdgpu_device *adev,
 
 /* query/inject/cure begin */
 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
-   struct ras_query_if *info)
+ struct ras_query_if *info)
 {
struct ras_manager *obj = amdgpu_ras_find_obj(adev, >head);
struct ras_err_data err_data = {0, 0, 0, NULL};
@@ -1047,17 +1047,32 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
return ret;
 }
 
-/* get the total error counts on all IPs */
-void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
- unsigned long *ce_count,
- unsigned long *ue_count)
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs
+ * adev: pointer to AMD GPU device
+ * ce_count: pointer to an integer to be set to the count of correctible 
errors.
+ * ue_count: pointer to an integer to be set to the count of uncorrectible
+ * errors.
+ *
+ * If set, @ce_count or @ue_count, count and return the corresponding
+ * error counts in those integer pointers. Return 0 if the device
+ * supports RAS. Return -EINVAL if the device doesn't support RAS.
+ */
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+unsigned long *ce_count,
+unsigned long *ue_count)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
unsigned long ce, ue;
 
if (!adev->ras_enabled || !con)
-   return;
+   return -EINVAL;
+
+   /* Don't count since no reporting.
+*/
+   if (!ce_count && !ue_count)
+   return 0;
 
ce = 0;
ue = 0;
@@ -1065,9 +1080,11 @@ void amdgpu_ras_query_error_count(struct amdgpu_device 
*adev,
struct ras_query_if info = {
.head = obj->head,
};
+   int res;
 
-   if (amdgpu_ras_query_error_status(adev, ))
-   return;
+   res = amdgpu_ras_query_error_status(adev, );
+   if (res)
+   return res;
 
ce += info.ce_count;
ue += info.ue_count;
@@ -1078,6 +1095,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device 
*adev,
 
if (ue_count)
*ue_count = ue;
+
+   return 0;
 }
 /* query/inject/cure end */
 
@@ -2145,9 +2164,10 @@ static void amdgpu_ras_counte_dw(struct work_struct 
*work)
 
/* Cache new values.
 */
-   amdgpu_ras_query_error_count(adev, _count, _count);
-   atomic_set(>ras_ce_count, ce_count);
-   atomic_set(>ras_ue_count, ue_count);
+   if (amdgpu_ras_query_error_count(adev, _count, _count) == 0) {
+   atomic_set(>ras_ce_count, ce_count);
+   atomic_set(>ras_ue_count, ue_count);
+   }
 
pm_runtime_mark_last_busy(dev->dev);
 Out:
@@ -2320,9 +2340,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
 
/* Those are the cached values at init.
 */
-   amdgpu_ras_query_error_count(adev, _count, _count);
-   atomic_set(>ras_ce_count, ce_count);
-   atomic_set(>ras_ue_count, ue_count);
+   if (amdgpu_ras_query_error_count(adev, _count, _count) == 0) {
+   atomic_set(>ras_ce_count, ce_count);
+   atomic_set(>ras_ue_count, ue_count);
+   }
 
return 0;
 cleanup:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 283afd791db107..4d9c63f2f37718 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -491,9 +491,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device 
*adev,
 void amdgpu_ras_resume(struct amdgpu_device *adev);
 void 

Re: [PATCH] drm/amdgpu: initialize amdgpu_ras_query_error_count() error count parameters

2021-07-02 Thread Luben Tuikov
That's a good find, but I'd rather functions have no side effects. I'll follow 
up with a patch which correctly fixes this.

Regards,
Luben

On 2021-07-02 3:52 p.m., t...@redhat.com wrote:
> From: Tom Rix 
>
> Static analysis reports this problem
> amdgpu_ras.c:2324:2: warning: 2nd function call argument is an
>   uninitialized value
> atomic_set(>ras_ce_count, ce_count);
> ^~~~
>
> ce_count is normally set by the earlier call to
> amdgpu_ras_query_error_count().  But amdgpu_ras_query_error_count()
> can return early without setting, leaving its error count parameters
> in a garbage state.
>
> Initialize the error count parameters earlier.
>
> Fixes: a46751fbcde5 ("drm/amdgpu: Fix RAS function interface")
> Signed-off-by: Tom Rix 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 875874ea745ec..c80fa545aa2b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1056,6 +1056,12 @@ void amdgpu_ras_query_error_count(struct amdgpu_device 
> *adev,
>   struct ras_manager *obj;
>   unsigned long ce, ue;
>  
> + if (ce_count)
> + *ce_count = 0;
> +
> + if (ue_count)
> + *ue_count = 0;
> +
>   if (!adev->ras_enabled || !con)
>   return;
>  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: use xarray for storing pasid in vm

2021-07-02 Thread Christian König

Am 02.07.21 um 11:25 schrieb Nirmoy Das:

Replace idr with xarray as we actually need hash functionality.
Cleanup code related to vm pasid by adding helper function.

Signed-off-by: Nirmoy Das 
Acked-by: Felix Kuehling 


Reviewed-by: Christian König  for the series.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 149 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   6 +-
  2 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..0f82df5bfa7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };
  
+/**

+ * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm pointer
+ * @pasid: the pasid the VM is using on this GPU
+ *
+ * Set the pasid this VM is using on this GPU, can also be used to remove the
+ * pasid by passing in zero.
+ *
+ */
+int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+   u32 pasid)
+{
+   int r;
+
+   if (vm->pasid == pasid)
+   return 0;
+
+   if (vm->pasid) {
+   r = xa_err(xa_erase_irq(>vm_manager.pasids, vm->pasid));
+   if (r < 0)
+   return r;
+
+   vm->pasid = 0;
+   }
+
+   if (pasid) {
+   r = xa_err(xa_store_irq(>vm_manager.pasids, pasid, vm,
+   GFP_KERNEL));
+   if (r < 0)
+   return r;
+
+   vm->pasid = pasid;
+   }
+
+
+   return 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)
  
  	amdgpu_bo_unreserve(vm->root.bo);
  
-	if (pasid) {

-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   if (r < 0)
-   goto error_free_root;
-
-   vm->pasid = pasid;
-   }
+   r = amdgpu_vm_set_pasid(adev, vm, pasid);
+   if (r)
+   goto error_free_root;
  
  	INIT_KFIFO(vm->faults);
  
@@ -3039,18 +3070,15 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,

if (r)
goto unreserve_bo;
  
-	if (pasid) {

-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   /* Free the original amdgpu allocated pasid,
+* will be replaced with kfd allocated pasid.
+*/
+   if (vm->pasid)
+   amdgpu_pasid_free(vm->pasid);
  
-		if (r == -ENOSPC)

-   goto unreserve_bo;
-   r = 0;
-   }
+   r = amdgpu_vm_set_pasid(adev, vm, pasid);
+   if (r)
+   goto unreserve_bo;
  
  	/* Check if PD needs to be reinitialized and do it before

 * changing any other state, in case it fails.
@@ -3061,7 +3089,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   to_amdgpu_bo_vm(vm->root.bo),
   false);
if (r)
-   goto free_idr;
+   goto free_pasid_entry;
}
  
  	/* Update VM state */

@@ -3078,7 +3106,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = amdgpu_bo_sync_wait(vm->root.bo,
AMDGPU_FENCE_OWNER_UNDEFINED, true);
if (r)
-   goto free_idr;
+   goto free_pasid_entry;
  
  		vm->update_funcs = _vm_cpu_funcs;

} else {
@@ -3088,36 +3116,13 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
vm->last_update = NULL;
vm->is_compute_context = true;
  
-	if (vm->pasid) {

-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, vm->pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-   /* Free the original amdgpu allocated pasid
-* Will be replaced with kfd allocated pasid
-*/
-   amdgpu_pasid_free(vm->pasid);
-   vm->pasid = 0;
-   

[PATCH 2/2] drm/amdgpu: separate out vm pasid assignment

2021-07-02 Thread Nirmoy Das
Use new helper function amdgpu_vm_set_pasid() to
assign vm pasid value. This also ensures that we don't free
a pasid from vm code as pasids are allocated somewhere else.

Signed-off-by: Nirmoy Das 
Acked-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 13 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   | 10 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 28 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  4 +--
 4 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f96598279593..3a2ac7f66bbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1287,11 +1287,22 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct 
kgd_dev *kgd,
if (avm->process_info)
return -EINVAL;
 
+   /* Free the original amdgpu allocated pasid,
+* will be replaced with kfd allocated pasid.
+*/
+   if (avm->pasid) {
+   amdgpu_pasid_free(avm->pasid);
+   amdgpu_vm_set_pasid(adev, avm, 0);
+   }
+
/* Convert VM into a compute VM */
-   ret = amdgpu_vm_make_compute(adev, avm, pasid);
+   ret = amdgpu_vm_make_compute(adev, avm);
if (ret)
return ret;
 
+   ret = amdgpu_vm_set_pasid(adev, avm, pasid);
+   if (ret)
+   return ret;
/* Initialize KFD part of the VM and process info */
ret = init_kfd_vm(avm, process_info, ef);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cbb932f97355..66bf8b0c56bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1178,10 +1178,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
pasid = 0;
}
 
-   r = amdgpu_vm_init(adev, >vm, pasid);
+   r = amdgpu_vm_init(adev, >vm);
if (r)
goto error_pasid;
 
+   r = amdgpu_vm_set_pasid(adev, >vm, pasid);
+   if (r)
+   goto error_vm;
+
fpriv->prt_va = amdgpu_vm_bo_add(adev, >vm, NULL);
if (!fpriv->prt_va) {
r = -ENOMEM;
@@ -1209,8 +1213,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
amdgpu_vm_fini(adev, >vm);
 
 error_pasid:
-   if (pasid)
+   if (pasid) {
amdgpu_pasid_free(pasid);
+   amdgpu_vm_set_pasid(adev, >vm, 0);
+   }
 
kfree(fpriv);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0f82df5bfa7a..565c8c59c995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2899,14 +2899,13 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long 
timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @pasid: Process address space identifier
  *
  * Init @vm fields.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2980,10 +2979,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)
 
amdgpu_bo_unreserve(vm->root.bo);
 
-   r = amdgpu_vm_set_pasid(adev, vm, pasid);
-   if (r)
-   goto error_free_root;
-
INIT_KFIFO(vm->faults);
 
return 0;
@@ -3039,7 +3034,6 @@ static int amdgpu_vm_check_clean_reserved(struct 
amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @pasid: pasid to use
  *
  * This only works on GFX VMs that don't have any BOs added and no
  * page tables allocated yet.
@@ -3047,7 +3041,6 @@ static int amdgpu_vm_check_clean_reserved(struct 
amdgpu_device *adev,
  * Changes the following VM parameters:
  * - use_cpu_for_update
  * - pte_supports_ats
- * - pasid (old PASID is released, because compute manages its own PASIDs)
  *
  * Reinitializes the page directory to reflect the changed ATS
  * setting.
@@ -3055,8 +3048,7 @@ static int amdgpu_vm_check_clean_reserved(struct 
amdgpu_device *adev,
  * Returns:
  * 0 for success, -errno for errors.
  */
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-  u32 pasid)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
@@ -3070,16 +3062,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (r)
goto unreserve_bo;
 
-   /* Free the original amdgpu allocated pasid,
-* will be replaced with kfd allocated 

[PATCH 1/2] drm/amdgpu: use xarray for storing pasid in vm

2021-07-02 Thread Nirmoy Das
Replace idr with xarray as we actually need hash functionality.
Cleanup code related to vm pasid by adding helper function.

Signed-off-by: Nirmoy Das 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 149 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   6 +-
 2 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..0f82df5bfa7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,46 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
 };
 
+/**
+ * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm pointer
+ * @pasid: the pasid the VM is using on this GPU
+ *
+ * Set the pasid this VM is using on this GPU, can also be used to remove the
+ * pasid by passing in zero.
+ *
+ */
+int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+   u32 pasid)
+{
+   int r;
+
+   if (vm->pasid == pasid)
+   return 0;
+
+   if (vm->pasid) {
+   r = xa_err(xa_erase_irq(>vm_manager.pasids, vm->pasid));
+   if (r < 0)
+   return r;
+
+   vm->pasid = 0;
+   }
+
+   if (pasid) {
+   r = xa_err(xa_store_irq(>vm_manager.pasids, pasid, vm,
+   GFP_KERNEL));
+   if (r < 0)
+   return r;
+
+   vm->pasid = pasid;
+   }
+
+
+   return 0;
+}
+
 /*
  * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
  * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2980,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, u32 pasid)
 
amdgpu_bo_unreserve(vm->root.bo);
 
-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-   if (r < 0)
-   goto error_free_root;
-
-   vm->pasid = pasid;
-   }
+   r = amdgpu_vm_set_pasid(adev, vm, pasid);
+   if (r)
+   goto error_free_root;
 
INIT_KFIFO(vm->faults);
 
@@ -3039,18 +3070,15 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (r)
goto unreserve_bo;
 
-   if (pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   r = idr_alloc(>vm_manager.pasid_idr, vm, pasid, pasid + 1,
- GFP_ATOMIC);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
+   /* Free the original amdgpu allocated pasid,
+* will be replaced with kfd allocated pasid.
+*/
+   if (vm->pasid)
+   amdgpu_pasid_free(vm->pasid);
 
-   if (r == -ENOSPC)
-   goto unreserve_bo;
-   r = 0;
-   }
+   r = amdgpu_vm_set_pasid(adev, vm, pasid);
+   if (r)
+   goto unreserve_bo;
 
/* Check if PD needs to be reinitialized and do it before
 * changing any other state, in case it fails.
@@ -3061,7 +3089,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   to_amdgpu_bo_vm(vm->root.bo),
   false);
if (r)
-   goto free_idr;
+   goto free_pasid_entry;
}
 
/* Update VM state */
@@ -3078,7 +3106,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = amdgpu_bo_sync_wait(vm->root.bo,
AMDGPU_FENCE_OWNER_UNDEFINED, true);
if (r)
-   goto free_idr;
+   goto free_pasid_entry;
 
vm->update_funcs = _vm_cpu_funcs;
} else {
@@ -3088,36 +3116,13 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
vm->last_update = NULL;
vm->is_compute_context = true;
 
-   if (vm->pasid) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>vm_manager.pasid_lock, flags);
-   idr_remove(>vm_manager.pasid_idr, vm->pasid);
-   spin_unlock_irqrestore(>vm_manager.pasid_lock, flags);
-
-   /* Free the original amdgpu allocated pasid
-* Will be replaced with kfd allocated pasid
-*/
-   amdgpu_pasid_free(vm->pasid);
-   vm->pasid = 0;
-   }
-
/* Free the shadow bo for compute VM */
 

Re: [PATCH] drm/amdgpu: Correct the irq numbers for virtual ctrc

2021-07-02 Thread Das, Nirmoy

Please describe bit more with a commit message.

On 7/1/2021 6:37 AM, Emily Deng wrote:

Signed-off-by: Emily Deng 
Signed-off-by: Victor 
---
  drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c 
b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 33324427b555..7e0d8c092c7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -766,7 +766,7 @@ static const struct amdgpu_irq_src_funcs 
dce_virtual_crtc_irq_funcs = {
  
  static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev)

  {
-   adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1;
+   adev->crtc_irq.num_types = adev->mode_info.num_crtc;
adev->crtc_irq.funcs = _virtual_crtc_irq_funcs;
  }
  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v4] drm/amdgpu: Restore msix after FLR

2021-07-02 Thread Peng Ju Zhou
From: "Emily.Deng" 

After FLR, the msix will be cleared, so need to re-enable it.

Signed-off-by: Emily.Deng 
Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 90f50561b43a..034420c38352 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,19 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
 }
 
+void amdgpu_restore_msix(struct amdgpu_device *adev)
+{
+   u16 ctrl;
+
+   pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, 
);
+   if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
+   return;
+
+   ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+   ctrl |= PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+}
 /**
  * amdgpu_irq_init - initialize interrupt handling
  *
@@ -558,6 +571,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
 {
int i, j, k;
 
+   amdgpu_restore_msix(adev);
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4] drm/amdgpu: Restore msix after FLR

2021-07-02 Thread Christian König



Am 02.07.21 um 05:23 schrieb Peng Ju Zhou:

From: "Emily.Deng" 

After FLR, the msix will be cleared, so need to re-enable it.

Signed-off-by: Emily.Deng 
Signed-off-by: Peng Ju Zhou 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 90f50561b43a..034420c38352 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,19 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
  }
  
+void amdgpu_restore_msix(struct amdgpu_device *adev)


I think this function should be static and maybe add a one line comment 
like "Clear and re-set the MSIX flags if they where set before to 
trigger re-enabling it".


With that done feel free to add an Acked-by: Christian König 
, but I would also wait what Alex has to say 
about that.


Regards,
Christian


+{
+   u16 ctrl;
+
+   pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, 
);
+   if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
+   return;
+
+   ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+   ctrl |= PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+}
  /**
   * amdgpu_irq_init - initialize interrupt handling
   *
@@ -558,6 +571,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
  {
int i, j, k;
  
+	amdgpu_restore_msix(adev);

for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx