Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-19 Thread Alex Deucher
On Tue, Sep 19, 2023 at 8:08 AM Christian König
 wrote:
>
> The KIQ code path was ignoring the second flush. Also avoid long lines and
> re-calculating the register offsets over and over again.
>
> Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
>  1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 2936a0fb7527..0f82ab48887a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -816,13 +816,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
> uint32_t vmhub, uint32_t flush_type)
>  {
> bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
> +   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
> const unsigned int eng = 17;
> -   u32 j, inv_req, inv_req2, tmp;
> struct amdgpu_vmhub *hub;
>
> BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>
> hub = >vmhub[vmhub];
> +   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> if (adev->gmc.xgmi.num_physical_nodes &&
> amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0)) {
> /* Vega20+XGMI caches PTEs in TC and TLB. Add a
> @@ -854,6 +858,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>
> amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>1 << vmid);
> +   if (inv_req2)
> +   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
> +  inv_req2, 1 << 
> vmid);
> +
> up_read(>reset_domain->sem);
> return;
> }
> @@ -872,9 +880,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
> for (j = 0; j < adev->usec_timeout; j++) {
> /* a read return value of 1 means semaphore acquire */
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
> else
> -   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
> if (tmp & 0x1)
> break;
> udelay(1);
> @@ -886,9 +894,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>
> do {
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
> hub->eng_distance * eng, inv_req);
> +   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
> else
> -   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
> hub->eng_distance * eng, inv_req);
> +   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>
> /*
>  * Issue a dummy read to wait for the ACK register to
> @@ -897,14 +905,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>  */
> if ((vmhub == AMDGPU_GFXHUB(0)) &&
> (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 
> 2)))
> -   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
> - hub->eng_distance * eng);
> +   RREG32_SOC15_IP_NO_KIQ(GC, req);
>
> for (j = 0; j < adev->usec_timeout; j++) {
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
> else
> -   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
> if (tmp & (1 << vmid))
> break;
> udelay(1);
> @@ -921,9 +928,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>  * write with 0 means semaphore release
>  */
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 

[PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-19 Thread Christian König
The KIQ code path was ignoring the second flush. Also avoid long lines and
re-calculating the register offsets over and over again.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2936a0fb7527..0f82ab48887a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -816,13 +816,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
const unsigned int eng = 17;
-   u32 j, inv_req, inv_req2, tmp;
struct amdgpu_vmhub *hub;
 
BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
 
hub = >vmhub[vmhub];
+   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
if (adev->gmc.xgmi.num_physical_nodes &&
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0)) {
/* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -854,6 +858,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
   1 << vmid);
+   if (inv_req2)
+   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+  inv_req2, 1 << vmid);
+
up_read(>reset_domain->sem);
return;
}
@@ -872,9 +880,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
if (tmp & 0x1)
break;
udelay(1);
@@ -886,9 +894,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
do {
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -897,14 +905,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if ((vmhub == AMDGPU_GFXHUB(0)) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
-   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+   RREG32_SOC15_IP_NO_KIQ(GC, req);
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -921,9 +928,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
 

Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-19 Thread Christian König

Am 08.09.23 um 20:58 schrieb Felix Kuehling:


On 2023-09-05 02:04, Christian König wrote:
The KIQ code path was ignoring the second flush. Also avoid long 
lines and

re-calculating the register offsets over and over again.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
  1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 0673cda547bb..4f6990ba71cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

  uint32_t vmhub, uint32_t flush_type)
  {
  bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, 
vmhub);

+    u32 j, inv_req, inv_req2, tmp, sem, req, ack;
  const unsigned int eng = 17;
-    u32 j, inv_req, inv_req2, tmp;
  struct amdgpu_vmhub *hub;
    BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
    hub = >vmhub[vmhub];
+    sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+    req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+    ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;


If you use SOC15_REG_OFFSET here, you can drop all the if (vmhub >= 
AMDGPU_MMHUB0(0)) conditions below.


I though about that as well, but that won't work since we don't know the 
register name.


Regards,
Christian.



Other than that, the patch looks good to me.

Regards,
  Felix



+
  if (adev->gmc.xgmi.num_physical_nodes &&
  adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
  /* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

    amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
 1 << vmid);
+    if (inv_req2)
+    amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+   inv_req2, 1 << vmid);
+
  up_read(>reset_domain->sem);
  return;
  }
@@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

  for (j = 0; j < adev->usec_timeout; j++) {
  /* a read return value of 1 means semaphore acquire */
  if (vmhub >= AMDGPU_MMHUB0(0))
-    tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);

+    tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
  else
-    tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);

+    tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
  if (tmp & 0x1)
  break;
  udelay(1);
@@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

    do {
  if (vmhub >= AMDGPU_MMHUB0(0))
-    WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);

+    WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
  else
-    WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);

+    WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
    /*
   * Issue a dummy read to wait for the ACK register to
@@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

   */
  if ((vmhub == AMDGPU_GFXHUB(0)) &&
  (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-    RREG32_NO_KIQ(hub->vm_inv_eng0_req +
-  hub->eng_distance * eng);
+    RREG32_NO_KIQ(req);
    for (j = 0; j < adev->usec_timeout; j++) {
  if (vmhub >= AMDGPU_MMHUB0(0))
-    tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);

+    tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
  else
-    tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);

+    tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
  if (tmp & (1 << vmid))
  break;
  udelay(1);
@@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct 
amdgpu_device *adev, uint32_t vmid,

   * write with 0 means semaphore release
   */
  if (vmhub >= AMDGPU_MMHUB0(0))
-    WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);

+    WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
  else
-    WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);

+    WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
  }
    spin_unlock(>gmc.invalidate_lock);




Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-08 Thread Felix Kuehling



On 2023-09-05 02:04, Christian König wrote:

The KIQ code path was ignoring the second flush. Also avoid long lines and
re-calculating the register offsets over and over again.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
  1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0673cda547bb..4f6990ba71cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
  {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
const unsigned int eng = 17;
-   u32 j, inv_req, inv_req2, tmp;
struct amdgpu_vmhub *hub;
  
  	BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
  
  	hub = >vmhub[vmhub];

+   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;


If you use SOC15_REG_OFFSET here, you can drop all the if (vmhub >= 
AMDGPU_MMHUB0(0)) conditions below.


Other than that, the patch looks good to me.

Regards,
  Felix



+
if (adev->gmc.xgmi.num_physical_nodes &&
adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
/* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  
  		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,

   1 << vmid);
+   if (inv_req2)
+   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+  inv_req2, 1 << vmid);
+
up_read(>reset_domain->sem);
return;
}
@@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
if (tmp & 0x1)
break;
udelay(1);
@@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  
  	do {

if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
  
  		/*

 * Issue a dummy read to wait for the ACK register to
@@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if ((vmhub == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+   RREG32_NO_KIQ(req);
  
  		for (j = 0; j < adev->usec_timeout; j++) {

if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
else
-   

Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-06 Thread Christian König

Am 05.09.23 um 22:45 schrieb Alex Deucher:

On Tue, Sep 5, 2023 at 3:00 AM Christian König
 wrote:

The KIQ code path was ignoring the second flush. Also avoid long lines and
re-calculating the register offsets over and over again.

I'd split this into two patches, one for the code cleanup and one to
fix the missing flush.


I've later opted for moving the whole workarounds a layer up because we 
seem to have missed this in a couple of more places.


So I should probably just completely drop fixing this here.

Christian.



Alex


Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
  1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0673cda547bb..4f6990ba71cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 uint32_t vmhub, uint32_t flush_type)
  {
 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
 const unsigned int eng = 17;
-   u32 j, inv_req, inv_req2, tmp;
 struct amdgpu_vmhub *hub;

 BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);

 hub = >vmhub[vmhub];
+   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
 if (adev->gmc.xgmi.num_physical_nodes &&
 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,

 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
+   if (inv_req2)
+   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+  inv_req2, 1 << vmid);
+
 up_read(>reset_domain->sem);
 return;
 }
@@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 for (j = 0; j < adev->usec_timeout; j++) {
 /* a read return value of 1 means semaphore acquire */
 if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
 else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
 if (tmp & 0x1)
 break;
 udelay(1);
@@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,

 do {
 if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
 else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);

 /*
  * Issue a dummy read to wait for the ACK register to
@@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  */
 if ((vmhub == AMDGPU_GFXHUB(0)) &&
 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+   RREG32_NO_KIQ(req);

 for (j = 0; j < adev->usec_timeout; j++) {
 if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
 else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
 if (tmp & (1 << vmid))
 break;
 udelay(1);
@@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  * write with 0 means semaphore release
  

Re: [PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-05 Thread Alex Deucher
On Tue, Sep 5, 2023 at 3:00 AM Christian König
 wrote:
>
> The KIQ code path was ignoring the second flush. Also avoid long lines and
> re-calculating the register offsets over and over again.

I'd split this into two patches, one for the code cleanup and one to
fix the missing flush.

Alex

>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
>  1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0673cda547bb..4f6990ba71cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
> uint32_t vmhub, uint32_t flush_type)
>  {
> bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
> +   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
> const unsigned int eng = 17;
> -   u32 j, inv_req, inv_req2, tmp;
> struct amdgpu_vmhub *hub;
>
> BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
>
> hub = >vmhub[vmhub];
> +   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> if (adev->gmc.xgmi.num_physical_nodes &&
> adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
> /* Vega20+XGMI caches PTEs in TC and TLB. Add a
> @@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>
> amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>1 << vmid);
> +   if (inv_req2)
> +   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
> +  inv_req2, 1 << 
> vmid);
> +
> up_read(>reset_domain->sem);
> return;
> }
> @@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
> for (j = 0; j < adev->usec_timeout; j++) {
> /* a read return value of 1 means semaphore acquire */
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
> else
> -   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
> hub->vm_inv_eng0_sem + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
> if (tmp & 0x1)
> break;
> udelay(1);
> @@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>
> do {
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
> hub->eng_distance * eng, inv_req);
> +   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
> else
> -   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
> hub->eng_distance * eng, inv_req);
> +   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
>
> /*
>  * Issue a dummy read to wait for the ACK register to
> @@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>  */
> if ((vmhub == AMDGPU_GFXHUB(0)) &&
> (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
> -   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
> - hub->eng_distance * eng);
> +   RREG32_NO_KIQ(req);
>
> for (j = 0; j < adev->usec_timeout; j++) {
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
> else
> -   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
> hub->vm_inv_eng0_ack + hub->eng_distance * eng);
> +   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
> if (tmp & (1 << vmid))
> break;
> udelay(1);
> @@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>  * write with 0 means semaphore release
>  */
> if (vmhub >= AMDGPU_MMHUB0(0))
> -   

[PATCH 01/11] drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb

2023-09-05 Thread Christian König
The KIQ code path was ignoring the second flush. Also avoid long lines and
re-calculating the register offsets over and over again.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 29 +--
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0673cda547bb..4f6990ba71cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,13 +814,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+   u32 j, inv_req, inv_req2, tmp, sem, req, ack;
const unsigned int eng = 17;
-   u32 j, inv_req, inv_req2, tmp;
struct amdgpu_vmhub *hub;
 
BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
 
hub = >vmhub[vmhub];
+   sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+   req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+   ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
if (adev->gmc.xgmi.num_physical_nodes &&
adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
/* Vega20+XGMI caches PTEs in TC and TLB. Add a
@@ -852,6 +856,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
   1 << vmid);
+   if (inv_req2)
+   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack,
+  inv_req2, 1 << vmid);
+
up_read(>reset_domain->sem);
return;
}
@@ -870,9 +878,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
if (tmp & 0x1)
break;
udelay(1);
@@ -884,9 +892,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
do {
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -895,14 +903,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if ((vmhub == AMDGPU_GFXHUB(0)) &&
(adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
-   RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+   RREG32_NO_KIQ(req);
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -919,9 +926,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
}