[PATCH] drm/amdgpu: Do not skip gfx9.4.3 and sdma4.4.2 fini under SRIOV

2024-03-11 Thread Victor Lu
GFX and SDMA is not properly deinitialized under SRIOV.

Remove amdgpu_sriov_vf() guards to allow VF to deinitialize correctly.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  | 7 +--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 9 -
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b53c8fd4e8cf..fbc34f1797a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2049,7 +2049,6 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device 
*adev, int xcc_id)
 * hypervisor side.
 */
WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), 
CP_PQ_WPTR_POLL_CNTL, EN, 0);
-   return;
}
 
/* Use deinitialize sequence from CAIL when unbinding device
@@ -2068,7 +2067,11 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device 
*adev, int xcc_id)
}
 
gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
-   gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+
+   /* SRIOV world switch will fail if MEC is disabled */
+   if (!amdgpu_sriov_vf(adev)) {
+   gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+   }
 }
 
 static int gfx_v9_4_3_hw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..fa0752d81c23 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1441,17 +1441,16 @@ static int sdma_v4_4_2_hw_fini(void *handle)
uint32_t inst_mask;
int i;
 
-   if (amdgpu_sriov_vf(adev))
-   return 0;
-
-   inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
-   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+   /* SRIOV does not init RAS features */
+   if ((!amdgpu_sriov_vf(adev)) &&
+amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_irq_put(adev, >sdma.ecc_irq,
   AMDGPU_SDMA_IRQ_INSTANCE0 + i);
}
}
 
+   inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
sdma_v4_4_2_inst_enable(adev, false, inst_mask);
 
-- 
2.34.1



[PATCH] drm/amdgpu: Do not program IH_CHICKEN in vega20_ih.c under SRIOV

2024-02-13 Thread Victor Lu
IH_CHICKEN is blocked for VF writes; this access should be skipped.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 38 ++
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index db66e6cccaf2..b9e785846637 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -291,27 +291,29 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
 
adev->nbio.funcs->ih_control(adev);
 
-   if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) &&
-   adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
-   ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
-   if (adev->irq.ih.use_bus_addr) {
-   ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
-  MC_SPACE_GPA_ENABLE, 1);
+   if (!amdgpu_sriov_vf(adev)) {
+   if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 
2, 1)) &&
+   adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+   ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+   if (adev->irq.ih.use_bus_addr) {
+   ih_chicken = REG_SET_FIELD(ih_chicken, 
IH_CHICKEN,
+  MC_SPACE_GPA_ENABLE, 
1);
+   }
+   WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
}
-   WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
-   }
 
-   /* psp firmware won't program IH_CHICKEN for aldebaran
-* driver needs to program it properly according to
-* MC_SPACE type in IH_RB_CNTL */
-   if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) ||
-   (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) {
-   ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
-   if (adev->irq.ih.use_bus_addr) {
-   ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
-  MC_SPACE_GPA_ENABLE, 1);
+   /* psp firmware won't program IH_CHICKEN for aldebaran
+* driver needs to program it properly according to
+* MC_SPACE type in IH_RB_CNTL */
+   if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 
4, 0)) ||
+   (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 
4, 2))) {
+   ih_chicken = RREG32_SOC15(OSSSYS, 0, 
mmIH_CHICKEN_ALDEBARAN);
+   if (adev->irq.ih.use_bus_addr) {
+   ih_chicken = REG_SET_FIELD(ih_chicken, 
IH_CHICKEN,
+  MC_SPACE_GPA_ENABLE, 
1);
+   }
+   WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, 
ih_chicken);
}
-   WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
}
 
for (i = 0; i < ARRAY_SIZE(ih); i++) {
-- 
2.34.1



[PATCH] drm/amdgpu: Improve error checking in amdgpu_virt_rlcg_reg_rw (v2)

2024-02-13 Thread Victor Lu
The current error detection only looks for a timeout.
This should be changed to also check scratch_reg1 for any errors
returned from RLCG.

v2: remove new error value

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 6ff7d3fb2008..7a4eae36778a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -979,7 +979,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 
offset, u32 v, u32 f
 * SCRATCH_REG0 = read/write value
 * SCRATCH_REG1[30:28]  = command
 * SCRATCH_REG1[19:0]   = address in dword
-* SCRATCH_REG1[26:24]  = Error reporting
+* SCRATCH_REG1[27:24]  = Error reporting
 */
writel(v, scratch_reg0);
writel((offset | flag), scratch_reg1);
@@ -993,7 +993,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 
offset, u32 v, u32 f
udelay(10);
}
 
-   if (i >= timeout) {
+   tmp = readl(scratch_reg1);
+   if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 
0) {
if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
dev_err(adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fa7be5f277b9..3f59b7b5523f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -45,6 +45,7 @@
 #define AMDGPU_RLCG_REG_NOT_IN_RANGE   0x100
 
 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK  0xF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK0xF00
 
 /* all asic after AI use this offset */
 #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
-- 
2.34.1



[PATCH 4/4] drm/amdgpu: Do not program VM_L2_CNTL under SRIOV

2024-01-02 Thread Victor Lu
VM_L2_CNTL* should not be programmed on driver unload under SRIOV.
These regs are skipped during SRIOV driver init.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 55423ff1bb49..20e800bc0b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -454,10 +454,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct 
amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, 
tmp);
 
/* Setup L2 cache */
-   tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
-   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-   WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
-   WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+   if (!amdgpu_sriov_vf(adev)) {
+   tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+   tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 
0);
+   WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+   WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+   }
}
 }
 
-- 
2.34.1



[PATCH 3/4] drm/amdgpu: Use correct SRIOV macro for gmc_v9_0_vm_fault_interrupt_state

2024-01-02 Thread Victor Lu
Under SRIOV, programming to VM_CONTEXT*_CNTL regs failed because the
current macro does not pass through the correct xcc instance.
Use the *REG32_XCC macro in this case.

The behaviour without SRIOV is the same.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 473a774294ce..e2e14d40109c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -496,14 +496,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
else
-   tmp = RREG32_SOC15_IP(GC, reg);
+   tmp = RREG32_XCC(reg, j);
 
tmp &= ~bits;
 
if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
else
-   WREG32_SOC15_IP(GC, reg, tmp);
+   WREG32_XCC(reg, tmp, j);
}
}
break;
@@ -524,14 +524,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
else
-   tmp = RREG32_SOC15_IP(GC, reg);
+   tmp = RREG32_XCC(reg, j);
 
tmp |= bits;
 
if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
else
-   WREG32_SOC15_IP(GC, reg, tmp);
+   WREG32_XCC(reg, tmp, j);
}
}
break;
-- 
2.34.1



[PATCH 2/4] drm/amdgpu: Do not program SQ_TIMEOUT_CONFIG in SRIOV

2024-01-02 Thread Victor Lu
VF should not program this register.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 00b21ece081f..30cc155f20d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3888,6 +3888,9 @@ static void gfx_v9_4_3_inst_enable_watchdog_timer(struct 
amdgpu_device *adev,
uint32_t i;
uint32_t data;
 
+   if (amdgpu_sriov_vf(adev))
+   return;
+
data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
 amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 
0);
-- 
2.34.1



[PATCH 1/4] drm/amdgpu: Improve error checking in amdgpu_virt_rlcg_reg_rw

2024-01-02 Thread Victor Lu
The current error detection only looks for a timeout.
This should be changed to also check scratch_reg1 for any errors
returned from RLCG.

Also add a new error value.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 0dcff2889e25..3cd085569515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1022,7 +1022,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 * SCRATCH_REG0 = read/write value
 * SCRATCH_REG1[30:28]  = command
 * SCRATCH_REG1[19:0]   = address in dword
-* SCRATCH_REG1[26:24]  = Error reporting
+* SCRATCH_REG1[27:24]  = Error reporting
 */
writel(v, scratch_reg0);
writel((offset | flag), scratch_reg1);
@@ -1036,7 +1036,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
udelay(10);
}
 
-   if (i >= timeout) {
+   tmp = readl(scratch_reg1);
+   if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 
0) {
if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
dev_err(adev->dev,
@@ -1047,6 +1048,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
} else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
dev_err(adev->dev,
"register is not in range, rlcg 
failed to program reg: 0x%05x\n", offset);
+   } else if (tmp & 
AMDGPU_RLCG_INVALID_XCD_ACCESS) {
+   dev_err(adev->dev,
+   "invalid xcd access, rlcg 
failed to program reg: 0x%05x\n", offset);
} else {
dev_err(adev->dev,
"unknown error type, rlcg 
failed to program reg: 0x%05x\n", offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d4207e44141f..447af2e4aef0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -40,11 +40,13 @@
 #define AMDGPU_RLCG_MMHUB_WRITE(0x2 << 28)
 
 /* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_INVALID_XCD_ACCESS 0x800
 #define AMDGPU_RLCG_VFGATE_DISABLED0x400
 #define AMDGPU_RLCG_WRONG_OPERATION_TYPE   0x200
 #define AMDGPU_RLCG_REG_NOT_IN_RANGE   0x100
 
 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK  0xF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK0xF00
 
 /* all asic after AI use this offset */
 #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
-- 
2.34.1



[PATCH v2] drm/amdgpu: Do not program VF copy regs in mmhub v1.8 under SRIOV (v2)

2023-11-10 Thread Victor Lu
MC_VM_AGP_* registers should not be programmed by guest driver.

v2: move early return outside of loop

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index ea142611be1c..9b0146732e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct 
amdgpu_device *adev)
uint64_t value;
int i;

+   if (amdgpu_sriov_vf(adev))
+   return;
+
inst_mask = adev->aid_mask;
for_each_inst(i, inst_mask) {
/* Program the AGP BAR */
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct 
amdgpu_device *adev)
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
 adev->gmc.agp_end >> 24);

-   if (amdgpu_sriov_vf(adev))
-   return;
-
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
--
2.34.1



[PATCH v2] drm/amdgpu: Do not program PF-only regs in hdp_v4_0.c under SRIOV (v2)

2023-11-08 Thread Victor Lu
The following regs can only be programmed by the PF:
HDP_MISC_CNTL
HDP_NONSURFACE_BASE
HDP_NONSURFACE_BASE_HI

v2: update commit message

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 3f3a6445c006..bf3bdf67abb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -145,6 +145,11 @@ static void hdp_v4_0_init_registers(struct amdgpu_device 
*adev)
break;
}
 
+   /* Do not program registers if VF */
+   if (amdgpu_sriov_vf(adev)) {
+   return;
+   }
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
 
if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
-- 
2.34.1



[PATCH] drm/amdgpu: Do not program HDP_MISC_CNTL in hdp_v4_0.c under SRIOV

2023-11-07 Thread Victor Lu
This register is blocked for VF access.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 3f3a6445c006..bf3bdf67abb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -145,6 +145,11 @@ static void hdp_v4_0_init_registers(struct amdgpu_device 
*adev)
break;
}
 
+   /* Do not program registers if VF */
+   if (amdgpu_sriov_vf(adev)) {
+   return;
+   }
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
 
if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
-- 
2.34.1



[PATCH] drm/amdgpu: Skip PCTL0_MMHUB_DEEPSLEEP_IB write in jpegv4.0.3 under SRIOV

2023-11-07 Thread Victor Lu
PCTL0_MMHUB_DEEPSLEEP_IB is blocked for VF access

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 355d25fd6169..165448bed6c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -652,9 +652,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct 
amdgpu_ring *ring)
  */
 static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
 {
-   amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-   0, 0, PACKETJ_TYPE0));
-   amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+   if (!amdgpu_sriov_vf(ring->adev)) {
+   amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+   0, 0, PACKETJ_TYPE0));
+   amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+   }
 
amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
0, 0, PACKETJ_TYPE0));
@@ -670,9 +672,11 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct 
amdgpu_ring *ring)
  */
 static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
 {
-   amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-   0, 0, PACKETJ_TYPE0));
-   amdgpu_ring_write(ring, 0x62a04);
+   if (!amdgpu_sriov_vf(ring->adev)) {
+   amdgpu_ring_write(ring, 
PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+   0, 0, PACKETJ_TYPE0));
+   amdgpu_ring_write(ring, 0x62a04);
+   }
 
amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
0, 0, PACKETJ_TYPE0));
-- 
2.34.1



[PATCH] drm/amdgpu: Do not program VF copy regs in mmhub v1.8 under SRIOV

2023-11-07 Thread Victor Lu
MC_VM_AGP_* registers should not be programmed by guest driver.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index ea142611be1c..125917c3cba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -132,6 +132,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct 
amdgpu_device *adev)
 
inst_mask = adev->aid_mask;
for_each_inst(i, inst_mask) {
+   if (amdgpu_sriov_vf(adev))
+   return;
+
/* Program the AGP BAR */
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT,
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct 
amdgpu_device *adev)
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
 adev->gmc.agp_end >> 24);
 
-   if (amdgpu_sriov_vf(adev))
-   return;
-
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-- 
2.34.1



[PATCH 2/4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)

2023-11-03 Thread Victor Lu
WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait
are not using the correct rlcg interface or mec engine, respectively.

Add xcc instance parameter to them.

v4: Use GET_INST and squash commit with:
"drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait"

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 6 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..e179f022c428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..bb436d41b4ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..a43d1aa42e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..93f100dd5d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..35ef7529cc8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = GET_INST(GC, 0)

[PATCH 4/4] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2)

2023-11-03 Thread Victor Lu
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC
should be used instead when inst != 0.

v2: rebase

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 38 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 +-
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 80309d39737a..f6598b9e4faa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, 
data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+   lower_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+   upper_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR,
+   lower_32_bits((uintptr_t)wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1,
+   (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-  REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+  REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 
1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   watch_address_low, inst);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 9285789b3a42..00fbc0f44c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device 
*adev, uint32_t vmi
 {
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
 
-  

[PATCH 1/4] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-11-03 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index a1c2c952d882..ce2a9876369e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1101,6 +1101,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v5)

2023-11-03 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v5: add condition in amdgpu_device_xcc_w/rreg, remove trace func call

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 89 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
 9 files changed, 116 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c579f5a95e..e8dc75a3ff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..80309d39737a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..9285789b3a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC

[PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)

2023-11-02 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
 9 files changed, 118 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c579f5a95e..e8dc75a3ff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..80309d39737a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..9285789b3a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbel

[PATCH 2/4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)

2023-11-02 Thread Victor Lu
WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait
are not using the correct rlcg interface or mec engine, respectively.

Add xcc instance parameter to them.

v4: Use GET_INST and squash commit with:
"drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait"

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 6 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..e179f022c428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..bb436d41b4ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..a43d1aa42e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..93f100dd5d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..35ef7529cc8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = GET_INST(GC, 0)

[PATCH 4/4] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2)

2023-11-02 Thread Victor Lu
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC
should be used instead when inst != 0.

v2: rebase

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 38 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 +-
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 80309d39737a..f6598b9e4faa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, 
data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+   lower_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+   upper_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR,
+   lower_32_bits((uintptr_t)wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1,
+   (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-  REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+  REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 
1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   watch_address_low, inst);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 9285789b3a42..00fbc0f44c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device 
*adev, uint32_t vmi
 {
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
 
-  

[PATCH 1/4] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-11-02 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index a1c2c952d882..ce2a9876369e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1101,6 +1101,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH v4] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4)

2023-10-31 Thread Victor Lu
WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait
are not using the correct rlcg interface or mec engine, respectively.

Add xcc instance parameter to them.

v4: Use GET_INST and squash commit with:
"drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait"

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 6 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..e179f022c428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..bb436d41b4ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..a43d1aa42e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..93f100dd5d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, GET_INST(GC, 0));
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..35ef7529cc8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = GET_INST(GC, 0)

[PATCH] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2)

2023-10-27 Thread Victor Lu
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC
should be used instead when inst != 0.

v2: rebase

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 38 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 +-
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 80309d39737a..f6598b9e4faa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, 
data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+   lower_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+   upper_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR,
+   lower_32_bits((uintptr_t)wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1,
+   (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-  REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+  REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 
1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   watch_address_low, inst);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 9285789b3a42..00fbc0f44c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device 
*adev, uint32_t vmi
 {
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
 
-  

[PATCH] drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait (v3)

2023-10-27 Thread Victor Lu
amdgpu_virt_kiq_reg_write_reg_wait is hardcoded to use MEC engine 0.
Add xcc_inst as a parameter to allow it to use different MEC engines.

v3: use first xcc for MMHUB in gmc_v9_0_flush_gpu_tlb

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 26 ++--
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7a084fbfd33c..82762c61d3ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 03c0e38b8aea..5c64258eb728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_id);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..173237e99882 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..2e4abb356e38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0ab9c554da78..32cc3645f02b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = 0;
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub-&

[PATCH] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)

2023-10-27 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 91 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
 9 files changed, 118 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c579f5a95e..e8dc75a3ff44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)

-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)

 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..80309d39737a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);

for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);


/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..9285789b3a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,

for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);


/* Activate doorbell logic before 

[PATCH] drm/amdgpu: Add xcc instance parameter to *REG32_SOC15_IP_NO_KIQ (v3)

2023-10-27 Thread Victor Lu
The WREG32/RREG32_SOC15_IP_NO_KIQ call is using XCC0's RLCG interface
when programming other XCCs.

Add xcc instance parameter to them.

v3: xcc not needed for MMMHUB

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a1050344b59..0ab9c554da78 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -856,9 +856,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, vmhub);
if (tmp & 0x1)
break;
udelay(1);
@@ -869,9 +869,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, 0);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, vmhub);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -884,9 +884,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, 0);
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, vmhub);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -899,9 +899,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, 0);
else
-   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, vmhub);
}
 
spin_unlock(>gmc.invalidate_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index da683afa0222..c75e9cd5c98b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
@@ -86,8 +86,8 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
-- 
2.34.1



[PATCH 5/5] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0

2023-10-25 Thread Victor Lu
W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC
should  be used instead when inst != 0.

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 38 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 +-
 3 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index c94df54e2657..cac6171528be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, 
data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-   regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+   lower_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+   upper_32_bits(guessed_wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR,
+   lower_32_bits((uintptr_t)wptr));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1,
+   (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-  REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+  REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 
1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   AMDGPU_REGS_RLC, watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   AMDGPU_REGS_RLC, watch_address_low, inst);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 47c8c334c779..f5ef8f0543fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device 
*adev, uint32_t vmi
 {
kgd_gfx_v9_lock_sr

[PATCH 4/5] drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait (v2)

2023-10-25 Thread Victor Lu
amdgpu_virt_kiq_reg_write_reg_wait is hardcoded to use MEC engine 0.
Add xcc_inst as a parameter to allow it to use different MEC engines.

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 26 ++--
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index c6c8f4fed0c1..f1a05053155e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..345b30972213 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_id);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..173237e99882 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..2e4abb356e38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -228,7 +228,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b7b1b04b66cb..00ef35b72213 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = vmhub - AMDGPU_MMHUB0(0);
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = 

[PATCH 3/5] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v3)

2023-10-25 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 84 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c   |  8 +-
 8 files changed, 107 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a2e8c2b60857..09989ebb5da3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1168,11 +1168,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1213,8 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1224,6 +1231,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, flag, inst) amdgpu_device_xcc_rreg(adev, (reg), flag, 
inst)
+#define WREG32_XCC(reg, v, flag, inst) amdgpu_device_xcc_wreg(adev, (reg), 
(v), flag, inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..c94df54e2657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..47c8c334c779 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device

[PATCH 2/5] drm/amdgpu: Add xcc instance parameter to *REG32_SOC15_IP_NO_KIQ (v2)

2023-10-25 Thread Victor Lu
The WREG32/RREG32_SOC15_IP_NO_KIQ call is using XCC0's RLCG interface
when programming other XCCs.

Add xcc instance parameter to them.

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fee3141bb607..b7b1b04b66cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -856,9 +856,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, vmhub 
- AMDGPU_MMHUB(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, vmhub);
if (tmp & 0x1)
break;
udelay(1);
@@ -869,9 +869,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, vmhub - 
AMDGPU_MMHUB(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, vmhub);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -884,9 +884,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, vmhub - 
AMDGPU_MMHUB(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, vmhub);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -899,9 +899,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, vmhub - 
AMDGPU_MMHUB(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, vmhub);
}
 
spin_unlock(>gmc.invalidate_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index da683afa0222..c75e9cd5c98b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
@@ -86,8 +86,8 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
-- 
2.34.1



[PATCH 1/5] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-10-25 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 41bbabd9ad4d..386804f2e95c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1102,6 +1102,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH 5/6] drm/amdgpu: Add new macro WREG32_RLC_XCC (v2)

2023-10-03 Thread Victor Lu
WREG32_RLC does not specify the correct XCC so the RLCG interface does
not work.

Define WREG32_RLC_XCC to be like WREG32_RLC but include a parameter to
specify the XCC.

v2: Add new macro WREG32_RLC_XCC instead of modifying exiting WREG32_RLC
macro

Original v1 title: "drm/amdgpu: Add xcc instance parameter to *REG32_RLC"

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 41 +--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 34 +++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  8 +++-
 3 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..084dbc41b579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,14 +300,14 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_RLC_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
+   data, inst);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +336,26 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
+  lower_32_bits(guessed_wptr), inst);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
+  upper_32_bits(guessed_wptr), inst);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
+  lower_32_bits((uintptr_t)wptr), inst);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
-   upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   upper_32_bits((uintptr_t)wptr), inst);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
+  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id), inst);
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+CP_HQD_EOP_RPTR, INIT_FETCHER, 1), inst);
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_ACTIVE), data, inst);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +493,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_RLC_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
-   WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+   WREG32_RLC_XCC((

[PATCH 6/6] drm/amdgpu: Use *REG32_RLC_XCC in gfx_v9_4_3.c (v2)

2023-10-03 Thread Victor Lu
gfx_v9_4_3_xcc_set_compute_eop_interrupt_state should use
*REG32_RLC_XCC so CP_ME1_PIPE0_INT_CNTL registers can be accessed
under SRIOV.

v2: Use RREG32_RLC_XCC

Original v1 title: "drm/amdgpu: Use *REG32_RLC in gfx_v9_4_3.c"

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index e7543bc49ee5..c010e80adb99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2739,16 +2739,16 @@ static void 
gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
 
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
+   mec_int_cntl = RREG32_RLC_XCC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, 
CP_ME1_PIPE0_INT_CNTL,
 TIME_STAMP_INT_ENABLE, 0);
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
+   WREG32_RLC_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
case AMDGPU_IRQ_STATE_ENABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
+   mec_int_cntl = RREG32_RLC_XCC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, 
CP_ME1_PIPE0_INT_CNTL,
 TIME_STAMP_INT_ENABLE, 1);
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
+   WREG32_RLC_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
default:
break;
-- 
2.34.1



[PATCH 4/6] drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait (v2)

2023-10-03 Thread Victor Lu
amdgpu_virt_kiq_reg_write_reg_wait is hardcoded to use MEC engine 0.
Add xcc_inst as a parameter to allow it to use different MEC engines.

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 26 ++--
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 786ce1eb75b6..5007cd4c2262 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fabb83e9d9ae..e9eb64e11c9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -330,7 +330,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_id);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 70370b412d24..fc93ef4341f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -265,7 +265,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index d0a9ee2f12d3..30bdd1966561 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -225,7 +225,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 24f6e070603e..930cc32b885b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -814,7 +814,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-   u32 j, inv_req, tmp, sem, req, ack;
+   u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
struct amdgpu_vmhub *hub;
 
@@ -829,13 +829,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = vmhub - AMDGPU_MMHUB0(0);
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = 

[PATCH 3/6] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v2)

2023-10-03 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
of amdgpu_device_wreg/rreg

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 13 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 76 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c|  8 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  4 +-
 5 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 73e825d20259..387ef7e2419f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1161,11 +1161,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t v,
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
@@ -1206,8 +1213,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1217,6 +1224,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, 
inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..96d1937a1d53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -405,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(>reset_domain->sem)) {
-   ret = amdgpu_kiq_rreg(adev, reg);
+   ret = amdgpu_kiq_rreg(adev, reg, 0);
up_read(>reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -442,6 +442,44 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, 
uint32_t offset)
BUG();
 }
 
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id)
+{
+   uint32_t ret;
+
+   if (amdgpu_device_skip_hw_access(adev))
+   return 0;
+
+   if ((reg * 4) < adev->rmmio_size) {
+   if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+   amdgpu_sriov_runtime(adev) &&
+   down_read_tryl

[PATCH 2/6] drm/amdgpu: Add xcc instance parameter to *REG32_SOC15_IP_NO_KIQ (v2)

2023-10-03 Thread Victor Lu
The WREG32/RREG32_SOC15_IP_NO_KIQ call is using XCC0's RLCG interface
when programming other XCCs.

Add xcc instance parameter to them.

v2: rebase

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6b15677c0314..24f6e070603e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -853,9 +853,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, vmhub 
- AMDGPU_MMHUB(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, vmhub);
if (tmp & 0x1)
break;
udelay(1);
@@ -866,9 +866,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, vmhub - 
AMDGPU_MMHUB(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, vmhub);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -881,9 +881,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, vmhub - 
AMDGPU_MMHUB(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, vmhub);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -896,9 +896,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, vmhub - 
AMDGPU_MMHUB(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, vmhub);
}
 
spin_unlock(>gmc.invalidate_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index da683afa0222..c75e9cd5c98b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
@@ -86,8 +86,8 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
-- 
2.34.1



[PATCH 1/6] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-10-03 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index fbfe0a1c4b19..e7543bc49ee5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1102,6 +1102,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3

2023-08-29 Thread Victor Lu
amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.
Add an "xcc_id" parameter to them so its uses the correct XCD's engine

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 24 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c|  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  4 ++--
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de074243c4d..798248677849 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1136,12 +1136,14 @@ uint32_t amdgpu_device_wait_on_rreg(struct 
amdgpu_device *adev,
uint32_t inst, uint32_t reg_addr, char reg_name[],
uint32_t expected_value, uint32_t mask);
 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
-   uint32_t reg, uint32_t acc_flags);
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
-   uint32_t acc_flags);
+   uint32_t acc_flags,
+   uint32_t xcc_id);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 u64 reg_addr, u32 reg_data);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
@@ -1177,18 +1179,20 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define AMDGPU_REGS_NO_KIQ(1<<1)
 #define AMDGPU_REGS_RLC(1<<2)

-#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
-#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ)
+#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ, 
0)
+#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), 
AMDGPU_REGS_NO_KIQ, 0)

-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)

 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))

-#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_device_rreg(adev, (reg), 0))
-#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
+#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0, 0)
+#define RREG32_XCC(reg, xcc) amdgpu_device_rreg(adev, (reg), 0, xcc)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", 
amdgpu_device_rreg(adev, (reg), 0, 0))
+#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0, 0)
+#define WREG32_XCC(reg, v, xcc) amdgpu_device_wreg(adev, (reg), (v), 0, xcc)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@@ -1236,7 +1240,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
WREG32_SMC(_Reg, tmp);  \
} while (0)

-#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", 
amdgpu_device_rreg((adev), (reg), false))
+#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", 
amdgpu_device_rreg((adev), (reg), false, 0))

 #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
 #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 533daba2accb..7290e79f7584 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -394,7 +394,8 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device 
*adev)
  * Returns the 32 bit value from the offset specified.
  */
 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
-   uint32_t reg, uint32_t acc_flags)
+   uint32_t reg, uint32_t acc_flags,
+   uint32_t xcc_id)
 {
uint32_t ret;

@@ -405,7 +406,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(>reset_domain->sem)) {
-   ret = amdgpu_kiq_rreg(adev, reg);
+   ret = amdgpu_kiq

[PATCH 2/2] drm/amdgpu: Use *REG32_RLC in gfx_v9_4_3.c

2023-08-08 Thread Victor Lu
gfx_v9_4_3_xcc_set_compute_eop_interrupt_state should use *REG32_RLC
so CP_ME1_PIPE0_INT_CNTL registers can be accessed with SRIOV.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 370660b2f843..8b779a103400 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2742,16 +2742,16 @@ static void 
gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
 
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
+   mec_int_cntl = RREG32_RLC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, 
CP_ME1_PIPE0_INT_CNTL,
 TIME_STAMP_INT_ENABLE, 0);
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
+   WREG32_RLC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
case AMDGPU_IRQ_STATE_ENABLE:
-   mec_int_cntl = RREG32(mec_int_cntl_reg);
+   mec_int_cntl = RREG32_RLC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, 
CP_ME1_PIPE0_INT_CNTL,
 TIME_STAMP_INT_ENABLE, 1);
-   WREG32(mec_int_cntl_reg, mec_int_cntl);
+   WREG32_RLC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
default:
break;
-- 
2.34.1



[PATCH 1/2] drm/amdgpu: Add xcc instance parameter to *REG32_RLC

2023-08-08 Thread Victor Lu
WREG32/RREG32_RLC should specify the instance so the correct XCC's RLCG
interface can be used.

Signed-off-by: Victor Lu 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 25 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  4 +--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 32 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  6 ++--
 drivers/gpu/drm/amd/amdgpu/soc15.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  8 ++---
 7 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index e2fed6edbdd0..4b6007de24ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -154,11 +154,11 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, 0);
 
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_low);
+   watch_address_low, 0);
 
return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..218226c10b93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,14 +300,14 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32_RLC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_DOORBELL_CONTROL),
-   data);
+   data, inst);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -337,26 +337,25 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_LO),
-  lower_32_bits(guessed_wptr));
+  lower_32_bits(guessed_wptr), inst);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_HI),
-  upper_32_bits(guessed_wptr));
+  upper_32_bits(guessed_wptr), inst);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_HQD_PQ_WPTR_POLL_ADDR),
-  lower_32_bits((uintptr_t)wptr));
+  lower_32_bits((uintptr_t)wptr), inst);
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
-   upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-  queue_id));
+   upper_32_bits((uintptr_t)wptr), inst);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 
regCP_PQ_WPTR_POLL_CNTL1),
+  (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, 
queue_id), inst);
}
 
/* Start the EOP fetcher */
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
-CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+CP_HQD_EOP_RPTR, INIT_FETCHER, 1), inst);
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data);
+   WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), 
data, inst);
 
kgd_gfx_v9_release_queue(adev, inst);
 
@@ -497,12 +496,12 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
-   watch_address_high);
+   watch_address_high, inst);
 
WREG32_RLC((SOC15_REG_OFFSET(

[PATCH 3/3] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3

2023-08-08 Thread Victor Lu
The "rlcg_reg_access_supported" flag is missing. Add it back in.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index a8178d871795..8b779a103400 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1096,6 +1096,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct 
amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, 
xcc_id), regRLC_SPARE_INT);
}
+   adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
-- 
2.34.1



[PATCH 2/3] drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait

2023-08-08 Thread Victor Lu
amdgpu_virt_kiq_reg_write_reg_wait is hardcoded to use MEC engine 0.
Add xcc_inst as a parameter to allow it to use different MEC engines.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +++---
 5 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 96857ae7fb5b..105a1fdff2f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_inst)
 {
-   struct amdgpu_kiq *kiq = >gfx.kiq[0];
+   struct amdgpu_kiq *kiq = >gfx.kiq[xcc_inst];
struct amdgpu_ring *ring = >ring;
signed long r, cnt = 0;
unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fabb83e9d9ae..e9eb64e11c9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -330,7 +330,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t xcc_id);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..56f50abcf9e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -346,7 +346,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
 
up_read(>reset_domain->sem);
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e3b76fd28d15..3f6626f8c298 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -303,7 +303,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-   1 << vmid);
+   1 << vmid, 0);
return;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 11bad5475b31..5b040011bb92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -815,7 +815,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned int eng = 17;
-   u32 j, inv_req, inv_req2, tmp;
+   u32 j, inv_req, inv_req2, tmp, inst;
struct amdgpu_vmhub *hub;
 
BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
@@ -844,14 +844,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* This is necessary for a HW workaround under SRIOV as well
 * as GFXOFF under bare metal
 */
-   if (adev->gfx.kiq[0].ring.sched.ready &&
+   if (vmhub >= AMDGPU_MMHUB0(0))
+   inst = vmhub - AMDGPU_MMHUB0(0);
+   else
+   inst = vmhub;
+   if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(>reset_domain->sem)) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
amdgpu_virt_kiq

[PATCH 1/3] drm/amdgpu: Add xcc instance parameter to *REG32_SOC15_IP_NO_KIQ

2023-08-08 Thread Victor Lu
The WREG32/RREG32_SOC15_IP_NO_KIQ call is using XCC0's RLCG interface
when programming other XCCs.

Add inst parameter to them.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..11bad5475b31 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -870,9 +870,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng, vmhub - AMDGPU_MMHUB0(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng, vmhub);
if (tmp & 0x1)
break;
udelay(1);
@@ -884,9 +884,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
do {
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req, vmhub - AMDGPU_MMHUB0(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req, vmhub);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -900,9 +900,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
-   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng, vmhub - AMDGPU_MMHUB0(0));
else
-   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng, vmhub);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -919,9 +919,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 * write with 0 means semaphore release
 */
if (vmhub >= AMDGPU_MMHUB0(0))
-   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0, vmhub - AMDGPU_MMHUB0(0));
else
-   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0);
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + 
hub->eng_distance * eng, 0, vmhub);
}
 
spin_unlock(>gmc.invalidate_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index b99244a53d6d..148eb2603b9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
@@ -86,8 +86,8 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value

[PATCH v2] drm/amdgpu: Fix infinite loop in gfxhub_v1_2_xcc_gart_enable (v2)

2023-07-19 Thread Victor Lu
An instance of for_each_inst() was not changed to match its new
behaviour and is causing a loop.

v2: remove tmp_mask variable

Fixes: 50c1d81d6365 ("drm/amdgpu: Modify for_each_inst macro")
Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c |  5 +-
 scripts/package/builddeb | 60 
 2 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 47f95ec218a3..dbdee1a0aefe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -402,18 +402,15 @@ static void gfxhub_v1_2_xcc_program_invalidation(struct 
amdgpu_device *adev,
 static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
   uint32_t xcc_mask)
 {
-   uint32_t tmp_mask;
int i;
 
-   tmp_mask = xcc_mask;
/*
 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are
 * VF copy registers so vbios post doesn't program them, for
 * SRIOV driver need to program them
 */
if (amdgpu_sriov_vf(adev)) {
-   for_each_inst(i, tmp_mask) {
-   i = ffs(tmp_mask) - 1;
+   for_each_inst(i, xcc_mask) {
WREG32_SOC15_RLC(GC, GET_INST(GC, i), 
regMC_VM_FB_LOCATION_BASE,
 adev->gmc.vram_start >> 24);
WREG32_SOC15_RLC(GC, GET_INST(GC, i), 
regMC_VM_FB_LOCATION_TOP,
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 67cd420dcf89..c5508054bfc8 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -164,23 +164,23 @@ if is_enabled CONFIG_MODULES; then
mv "$tmpdir/lib/modules/$version"/* 
"$tmpdir/usr/lib/uml/modules/$version/"
rmdir "$tmpdir/lib/modules/$version"
fi
-   if [ -n "$BUILD_DEBUG" ] ; then
-   for module in $(find $tmpdir/lib/modules/ -name *.ko -printf 
'%P\n'); do
-   module=lib/modules/$module
-   mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module)
-   # only keep debug symbols in the debug file
-   $OBJCOPY --only-keep-debug $tmpdir/$module 
$dbg_dir/usr/lib/debug/$module
-   # strip original module from debug symbols
-   $OBJCOPY --strip-debug $tmpdir/$module
-   # then add a link to those
-   $OBJCOPY 
--add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module
-   done
-
-   # resign stripped modules
-   if is_enabled CONFIG_MODULE_SIG_ALL; then
-   INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile 
modules_sign
-   fi
-   fi
+   #if [ -n "$BUILD_DEBUG" ] ; then
+   #   for module in $(find $tmpdir/lib/modules/ -name *.ko -printf 
'%P\n'); do
+   #   module=lib/modules/$module
+   #   mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module)
+   #   # only keep debug symbols in the debug file
+   #   $OBJCOPY --only-keep-debug $tmpdir/$module 
$dbg_dir/usr/lib/debug/$module
+   #   # strip original module from debug symbols
+   #   $OBJCOPY --strip-debug $tmpdir/$module
+   #   # then add a link to those
+   #   $OBJCOPY 
--add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module
+   #   done
+
+   #   # resign stripped modules
+   #   if is_enabled CONFIG_MODULE_SIG_ALL; then
+   #   INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile 
modules_sign
+   #   fi
+   #fi
 fi
 
 # Install the maintainer scripts
@@ -220,18 +220,18 @@ fi
 
 create_package "$packagename" "$tmpdir"
 
-if [ -n "$BUILD_DEBUG" ] ; then
-   # Build debug package
-   # Different tools want the image in different locations
-   # perf
-   mkdir -p $dbg_dir/usr/lib/debug/lib/modules/$version/
-   cp vmlinux $dbg_dir/usr/lib/debug/lib/modules/$version/
-   # systemtap
-   mkdir -p $dbg_dir/usr/lib/debug/boot/
-   ln -s ../lib/modules/$version/vmlinux 
$dbg_dir/usr/lib/debug/boot/vmlinux-$version
-   # kdump-tools
-   ln -s lib/modules/$version/vmlinux 
$dbg_dir/usr/lib/debug/vmlinux-$version
-   create_package "$dbg_packagename" "$dbg_dir"
-fi
+#if [ -n "$BUILD_DEBUG" ] ; then
+#  # Build debug package
+#  # Different tools want the image in different locations
+#  # perf
+#  mkdir -p $dbg_dir/usr/lib/debug/lib/modules/$version/
+#  cp vmlinux $dbg_dir/usr/lib/debug/lib/modules/$version/
+#  # systemtap
+#  mkdir -p $dbg

[PATCH] drm/amdgpu: Fix infinite loop in gfxhub_v1_2_xcc_gart_enable

2023-07-18 Thread Victor Lu
An instance of for_each_inst() was not changed to match its new
behaviour and is causing a loop.

Fixes: 50c1d81d6365 ("drm/amdgpu: Modify for_each_inst macro")
Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 47f95ec218a3..b74df0e9fb08 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -413,7 +413,6 @@ static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device 
*adev,
 */
if (amdgpu_sriov_vf(adev)) {
for_each_inst(i, tmp_mask) {
-   i = ffs(tmp_mask) - 1;
WREG32_SOC15_RLC(GC, GET_INST(GC, i), 
regMC_VM_FB_LOCATION_BASE,
 adev->gmc.vram_start >> 24);
WREG32_SOC15_RLC(GC, GET_INST(GC, i), 
regMC_VM_FB_LOCATION_TOP,
-- 
2.34.1



[PATCH v3] drm/amdgpu: Add RLCG interface driver implementation for gfx v9.4.3 (v3)

2023-07-11 Thread Victor Lu
Add RLCG interface support for gfx v9.4.3 and multiple XCCs.
Do not enable it yet.

v2: Fix amdgpu_rlcg_reg_access_ctrl init, add support for multiple XCCs
in amdgpu_mm_wreg_mmio_rlc

v3: Use GET_INST() when indexing amdgpu_rlcg_reg_access_ctrl

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c| 17 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 +++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h   | 66 ++---
 11 files changed, 81 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c578e07fd90a..a6647a1d13e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1182,7 +1182,7 @@ void amdgpu_device_indirect_wreg_ext(struct amdgpu_device 
*adev,
 u32 pcie_index, u32 pcie_index_hi,
 u32 pcie_data, u64 reg_addr, u32 reg_data);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-uint32_t reg, uint32_t v);
+uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 241d8c5da273..fc77bea72db7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -173,7 +173,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct 
file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
-   amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value);
+   amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 
0);
}
if (r) {
result = r;
@@ -301,7 +301,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char 
__user *buf, u32 off
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
-   amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, 
value);
+   amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, 
value, rd->id.xcc_id);
}
if (r) {
result = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45b335c766fd..abc56085b136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -586,7 +586,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  * this function is invoked only for the debugfs register access
  */
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-uint32_t reg, uint32_t v)
+uint32_t reg, uint32_t v,
+uint32_t xcc_id)
 {
if (amdgpu_device_skip_hw_access(adev))
return;
@@ -595,7 +596,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-   return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
+   return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
} else if ((reg * 4) >= adev->rmmio_size) {
adev->pcie_wreg(adev, reg * 4, v);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 80b263646966..b591d33af264 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -26,6 +26,8 @@
 
 #include "clearstate_defs.h"
 
+#define AMDGPU_MAX_RLC_INSTANCES   8
+
 /* firmware ID used in rlc toc */
 typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_INVALID = 0,
@@ -201,7 +203,7 @@ struct amdgpu_rlc {
u32 cp_table_size;
 
/* safe mode for updating CG/PG state */
-   bool in_safe_mode[8];
+   bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
const struct amdgpu_rlc_funcs *funcs;
 
/* for firmware data */
@@ -257,7 +259,7 @@ struct amdgpu_rlc {
 
bool rlcg_re

[PATCH v2] drm/amdgpu: Add RLCG interface driver implementation for gfx v9.4.3 (v2)

2023-07-06 Thread Victor Lu
Add RLCG interface support for gfx v9.4.3 and multiple XCCs.
Do not enable it yet.

v2: Fix amdgpu_rlcg_reg_access_ctrl init, add support for multiple XCCs
in amdgpu_mm_wreg_mmio_rlc

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c| 17 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 +++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h   | 66 ++---
 11 files changed, 81 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c578e07fd90a..a6647a1d13e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1182,7 +1182,7 @@ void amdgpu_device_indirect_wreg_ext(struct amdgpu_device 
*adev,
 u32 pcie_index, u32 pcie_index_hi,
 u32 pcie_data, u64 reg_addr, u32 reg_data);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-uint32_t reg, uint32_t v);
+uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t 
value);
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 241d8c5da273..fc77bea72db7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -173,7 +173,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct 
file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
-   amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value);
+   amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 
0);
}
if (r) {
result = r;
@@ -301,7 +301,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char 
__user *buf, u32 off
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
-   amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, 
value);
+   amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, 
value, rd->id.xcc_id);
}
if (r) {
result = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45b335c766fd..abc56085b136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -586,7 +586,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
  * this function is invoked only for the debugfs register access
  */
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
-uint32_t reg, uint32_t v)
+uint32_t reg, uint32_t v,
+uint32_t xcc_id)
 {
if (amdgpu_device_skip_hw_access(adev))
return;
@@ -595,7 +596,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-   return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
+   return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
} else if ((reg * 4) >= adev->rmmio_size) {
adev->pcie_wreg(adev, reg * 4, v);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 80b263646966..b591d33af264 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -26,6 +26,8 @@
 
 #include "clearstate_defs.h"
 
+#define AMDGPU_MAX_RLC_INSTANCES   8
+
 /* firmware ID used in rlc toc */
 typedef enum _FIRMWARE_ID_ {
FIRMWARE_ID_INVALID = 0,
@@ -201,7 +203,7 @@ struct amdgpu_rlc {
u32 cp_table_size;
 
/* safe mode for updating CG/PG state */
-   bool in_safe_mode[8];
+   bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
const struct amdgpu_rlc_funcs *funcs;
 
/* for firmware data */
@@ -257,7 +259,7 @@ struct amdgpu_rlc {
 
bool rlcg_reg_access_supported;
/* registers for rlcg indirect reg

[PATCH] drm/amdgpu: Add RLCG interface driver implementation for gfx v9.4.3

2023-06-30 Thread Victor Lu
Add RLCG interface support for gfx v9.4.3 and multiple XCCs.
Do not enable it yet.

Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 17 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c| 21 +++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h  | 66 +++---
 9 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45b335c766fd..ce9a0b5ed24f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -591,11 +591,12 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return;
 
+   /* TODO: Add support for different XCCs */
if (amdgpu_sriov_fullaccess(adev) &&
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-   return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
+   return amdgpu_sriov_wreg(adev, reg, v, 0, 0, 0);
} else if ((reg * 4) >= adev->rmmio_size) {
adev->pcie_wreg(adev, reg * 4, v);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 80b263646966..efabcf6add1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -257,7 +257,7 @@ struct amdgpu_rlc {
 
bool rlcg_reg_access_supported;
/* registers for rlcg indirect reg access */
-   struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl;
+   struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[8];
 };
 
 void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 5731829964c2..b5c6c7435551 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -960,7 +960,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct 
amdgpu_device *adev,
return ret;
 }
 
-static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 
v, u32 flag)
+static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 
v, u32 flag, u32 xcc_id)
 {
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
uint32_t timeout = 5;
@@ -978,7 +978,12 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device 
*adev, u32 offset, u32 v
return 0;
}
 
-   reg_access_ctrl = >gfx.rlc.reg_access_ctrl;
+   if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+   dev_err(adev->dev, "invalid xcc\n");
+   return 0;
+   }
+
+   reg_access_ctrl = >gfx.rlc.reg_access_ctrl[xcc_id];
scratch_reg0 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg0;
scratch_reg1 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg1;
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg2;
@@ -1043,13 +1048,13 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device 
*adev, u32 offset, u32 v
 
 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
   u32 offset, u32 value,
-  u32 acc_flags, u32 hwip)
+  u32 acc_flags, u32 hwip, u32 xcc_id)
 {
u32 rlcg_flag;
 
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
true, _flag)) {
-   amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag);
+   amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
return;
}
 
@@ -1060,13 +1065,13 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
 }
 
 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
- u32 offset, u32 acc_flags, u32 hwip)
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
 {
u32 rlcg_flag;
 
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
false, _flag))
-   return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag);
+   return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, 
xcc_id);
 
if (acc_flags & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
diff --git a/drivers/gpu/drm/amd/a

[PATCH] drm/amd/display: Use pr_debug in DM to prevent dmesg flooding

2021-03-30 Thread Victor Lu
[why]
Enabling drm.debug=0x4 can flood the dmesg due to prints on every cursor
update or page flip.

[how]
Define and use pr_debug macros instead of a few spammy DRM_DEBUG_*'s.

Signed-off-by: Victor Lu 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +--
 .../drm/amd/display/include/logger_types.h|  3 +++
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 92cee957b424..04dbcbc7578d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -372,14 +372,14 @@ static void dm_pflip_high_irq(void *interrupt_params)
/* IRQ could occur when in initial stage */
/* TODO work and BO cleanup */
if (amdgpu_crtc == NULL) {
-   DRM_DEBUG_DRIVER("CRTC is null, returning.\n");
+   DC_LOG_PFLIP("CRTC is null, returning.\n");
return;
}
 
spin_lock_irqsave(_to_drm(adev)->event_lock, flags);
 
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
-   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
+   DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
 amdgpu_crtc->pflip_status,
 AMDGPU_FLIP_SUBMITTED,
 amdgpu_crtc->crtc_id,
@@ -450,9 +450,9 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(_to_drm(adev)->event_lock, flags);
 
-   DRM_DEBUG_KMS("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc,
- vrr_active, (int) !e);
+   DC_LOG_PFLIP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc,
+vrr_active, (int) !e);
 }
 
 static void dm_vupdate_high_irq(void *interrupt_params)
@@ -482,7 +482,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
atomic64_set(_params->previous_timestamp, 
vblank->time);
}
 
-   DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d\n",
  acrtc->crtc_id,
  vrr_active);
 
@@ -535,7 +535,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 
vrr_active = amdgpu_dm_vrr_active_irq(acrtc);
 
-   DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
  vrr_active, acrtc->dm_irq_params.active_planes);
 
/**
@@ -7991,7 +7991,7 @@ static void handle_cursor_update(struct drm_plane *plane,
if (!plane->state->fb && !old_plane_state->fb)
return;
 
-   DRM_DEBUG_KMS("%s: crtc_id=%d with size %d to %d\n",
+   DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n",
  __func__,
  amdgpu_crtc->crtc_id,
  plane->state->crtc_w,
@@ -8053,8 +8053,8 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
 
-   DRM_DEBUG_KMS("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
- acrtc->crtc_id);
+   DC_LOG_PFLIP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+acrtc->crtc_id);
 }
 
 static void update_freesync_state_on_stream(
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h 
b/drivers/gpu/drm/amd/display/include/logger_types.h
index 21bbee17c527..571fcf23cea9 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -36,6 +36,9 @@
 #define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
+#define DC_LOG_CURSOR(...) pr_debug("[CURSOR]:"__VA_ARGS__)
+#define DC_LOG_PFLIP(...) pr_debug("[PFLIP]:"__VA_ARGS__)
+#define DC_LOG_VBLANK(...) pr_debug("[VBLANK]:"__VA_ARGS__)
 #define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
 #define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Free local data after use

2021-03-11 Thread Victor Lu
Fixes the following memory leak in dc_link_construct():

unreferenced object 0xa03e81471400 (size 1024):
comm "amd_module_load", pid 2486, jiffies 4294946026 (age 10.544s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
backtrace:
[<0bdf5c4a>] kmem_cache_alloc_trace+0x30a/0x4a0
[<e7c59f0e>] link_create+0xce/0xac0 [amdgpu]
[<2fb6c072>] dc_create+0x370/0x720 [amdgpu]
[<0094d1f3>] amdgpu_dm_init+0x18e/0x17a0 [amdgpu]
[<bec048fd>] dm_hw_init+0x12/0x20 [amdgpu]
[<a2bb7cf6>] amdgpu_device_init+0x1463/0x1e60 [amdgpu]
[<32d3bb13>] amdgpu_driver_load_kms+0x5b/0x330 [amdgpu]
[<a27834f9>] amdgpu_pci_probe+0x192/0x280 [amdgpu]
[<fec7d291>] local_pci_probe+0x47/0xa0
[<55dbbfa7>] pci_device_probe+0xe3/0x180
[<815da970>] really_probe+0x1c4/0x4e0
[<b4b6974b>] driver_probe_device+0x62/0x150
[<0f9ecc61>] device_driver_attach+0x58/0x60
[<0f65c843>] __driver_attach+0xd6/0x150
[<2f5e3683>] bus_for_each_dev+0x6a/0xc0
[<a1cfc897>] driver_attach+0x1e/0x20

Fixes: a8e30005b47a ("drm/amd/display/dc/core/dc_link: Move some local
data from the stack to the heap")
Signed-off-by: Victor Lu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9337e87a73e7..b60e0a4dc4bc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1615,6 +1615,7 @@ static bool dc_link_construct(struct dc_link *link,
link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
 
DC_LOG_DC("BIOS object table - %s finished successfully.\n", __func__);
+   kfree(info);
return true;
 device_tag_fail:
link->link_enc->funcs->destroy(>link_enc);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx