date:20200310

[refactor RLCG wreg path 1/2] drm/amdgpu: refactor RLCG access path part 1

2020-03-10 Thread Monk Liu

what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with
SRIOV RLCG logic

background:
we what to clear the code path for WREG32_RLC, to make it only covered
and handled by amdgpu_mm_wreg() routine, this way we can let RLCG
to serve the register access even through UMR (via debugfs interface)
the current implementation cannot achieve that goal because it can only
hardcode everywhere, but UMR only pass "offset" as varable to driver

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  80 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 177 +++-
 drivers/gpu/drm/amd/amdgpu/soc15.h  |   7 ++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 52509c2..60bb3e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
 struct amdgpu_rlc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 82ef08d..3222cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -224,6 +224,56 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)
 };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = {
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)},
+   {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)},
+};
+
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+   static void *scratch_reg0;
+   static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
+   static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+
+   scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+   scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+   grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
+   grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
+
+   if (amdgpu_sriov_runtime(adev)) {
+   pr_err("shoudn't call rlcg write register during runtime\n");
+   return;
+   }
+
+   writel(v, scratch_reg0);
+   writel(offset | 0x8000, scratch_reg1);
+   writel(1, spare_int);
+   for (i = 0; i < retries; i++) {
+   u32 tmp;
+
+   tmp = readl(scratch_reg1);
+   if (!(tmp & 0x8000))
+   break;
+
+   udelay(10);
+   }
+
+   if (i >= retries)
+   pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 {
/* Pending on emulation bring up */
@@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+   uint32_t offset,
+   struct soc15_reg_rlcg *entries, int 
arr_size)
+{
+   int i;
+   uint32_t reg;
+
+   for (i = 0; i < arr_size; i++) {
+   const struct soc15_reg_rlcg *entry;
+
+   entry = [i];
+   reg = 
adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+   if (offset == reg)
+   return true;
+   }
+
+   return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 
offset)
+{
+   return gfx_v10_0_check_rlcg_range(adev, offset,
+

[refactor RLCG wreg path 2/2] drm/amdgpu: refactor RLCG access path part 2

2020-03-10 Thread Monk Liu

switch to new RLCG access path, and drop the legacy
WREG32_RLC macros

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   5 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  28 +++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  11 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 
 8 files changed, 93 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2..a21f005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -277,25 +277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+  get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -656,7 +656,7 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
 
mutex_lock(>grbm_idx_mutex);
 
-   WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX,

RE: [PATCH] drm/amdgpu: check GFX RAS capability before reset counters

2020-03-10 Thread Liu, Monk

Reviewed-by: Monk Liu 

_
Monk Liu|GPU Virtualization Team |AMD


-Original Message-
From: amd-gfx  On Behalf Of Hawking Zhang
Sent: Wednesday, March 11, 2020 1:53 PM
To: amd-gfx@lists.freedesktop.org; Chen, Guchun ; Zhou1, 
Tao ; Clements, John ; Li, Dennis 

Cc: Zhang, Hawking 
Subject: [PATCH] drm/amdgpu: check GFX RAS capability before reset counters

disallow the logical to be enabled on platforms that don't support gfx ras at 
this stage, like sriov skus, dgpu with legacy ras.etc

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++  
drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index efd52bcf8785..dd8e68fdbd90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6328,6 +6328,9 @@ static void gfx_v9_0_reset_ras_error_count(struct 
amdgpu_device *adev)  {
int i, j, k;
 
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+   return;
+
/* read back registers to clear the counters */
mutex_lock(>grbm_idx_mutex);
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { diff 
--git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index 17f1e7b69a60..cceb46faf212 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -897,6 +897,9 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device 
*adev)  {
int i, j, k;
 
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+   return;
+
mutex_lock(>grbm_idx_mutex);
for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cmonk.liu%40amd.com%7C9bd134aae9bd4be2cc3408d7c580771f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637195027861178518sdata=VzcSAVLQMOEvoZ%2FPBHx2FTx81OYUU0j0SnntXDAuFCw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: check GFX RAS capability before reset counters

2020-03-10 Thread Hawking Zhang

disallow the logical to be enabled on platforms that
don't support gfx ras at this stage, like sriov skus,
dgpu with legacy ras.etc

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index efd52bcf8785..dd8e68fdbd90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6328,6 +6328,9 @@ static void gfx_v9_0_reset_ras_error_count(struct 
amdgpu_device *adev)
 {
int i, j, k;
 
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+   return;
+
/* read back registers to clear the counters */
mutex_lock(>grbm_idx_mutex);
for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index 17f1e7b69a60..cceb46faf212 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -897,6 +897,9 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device 
*adev)
 {
int i, j, k;
 
+   if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+   return;
+
mutex_lock(>grbm_idx_mutex);
for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-10 Thread Chen, Guchun

[AMD Public Use]

Hi Hawking,

Thanks for your suggestion.
Feedback inline.

Regards,
Guchun


_
From: Zhang, Hawking 
Sent: Wednesday, March 11, 2020 10:33 AM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration


[AMD Official Use Only - Internal Distribution Only]

 Oops, update the format to make it more readable.

1.  Disallow sriov guest/vf driver.
   [Guchun]Current code has it.

2.  Only include ASIC families that has server skus
   [Guchun]Current we only allow asics Vega20 and Arcturus for ras init.

3.  disable all the IP block RAS if amdgpu_ras_enable == 0
   [Guchun]This patch includes this logic. When amdgu_ras_enable, 
ras->supported will be set to be 0. No IP block will init ras corresponding 
code.

4.  Check HBM ECC flag
a.  explicitly inform users on the availability of this capability
b.  if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask
   [Guchun]This is missed. I will update to cover this.

5.  Check SRAM ECC flag
a.  explicitly inform users on the availability of this capability
b.  if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask
   [Guchun]Current patch has done this.

6.  Remove the redundant RAS atombios query in gmc_v9_0_late_init for 
VEGA20/ARCTURUS
a.  for Vega10 (legacy RAS), we have to keep inform user on RAS capability 
and apply DF workaround
b.  we can try to merge vega10 as well but that can be next step.
[Guchun]Sure. I will send another patch to achieve this.

Regards,
Hawking
-Original Message-
From: Zhang, Hawking mailto:hawking.zh...@amd.com>>
Sent: Wednesday, March 11, 2020 10:31
To: Zhang, Hawking mailto:hawking.zh...@amd.com>>; Chen, 
Guchun mailto:guchun.c...@amd.com>>; 
amd-gfx@lists.freedesktop.org; Li, Dennis 
mailto:dennis...@amd.com>>; Zhou1, Tao 
mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Add one more check.

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus 3). disable all the IP 
block RAS if amdgpu_ras_enable == 0 4). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask 
5). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask 6). Remove the redundant RAS atombios query in 
gmc_v9_0_late_init for VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 On Behalf Of Zhang, Hawking
Sent: Wednesday, March 11, 2020 10:29
To: Chen, Guchun mailto:guchun.c...@amd.com>>; 
amd-gfx@lists.freedesktop.org; Li, Dennis 
mailto:dennis...@amd.com>>; Zhou1, Tao 
mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

I would suggest we organized the amdgpu_ras_check_supported in following logic

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus 3). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask 
4). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask 5). Remove the redundant RAS atombios query in 
gmc_v9_0_late_init for VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: Chen, Guchun mailto:guchun.c...@amd.com>>
Sent: Wednesday, March 11, 2020 09:57
To: amd-gfx@lists.freedesktop.org; Zhang, 
Hawking mailto:hawking.zh...@amd.com>>; Li, Dennis 
mailto:dennis...@amd.com>>; Zhou1, Tao 
mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>
Cc: Chen, Guchun mailto:guchun.c...@amd.com>>
Subject: [PATCH] drm/amdgpu: update ras support capability with different sram 
ecc

RE: [PATCH] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-10 Thread Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

 Oops, update the format to make it more readable.

1.  Disallow sriov guest/vf driver.
2.  Only include ASIC families that has server skus
3.  disable all the IP block RAS if amdgpu_ras_enable == 0
4.  Check HBM ECC flag
a.  explicitly inform users on the availability of this capability
b.  if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask
5.  Check SRAM ECC flag
a.  explicitly inform users on the availability of this capability
b.  if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask
6.  Remove the redundant RAS atombios query in gmc_v9_0_late_init for 
VEGA20/ARCTURUS
a.  for Vega10 (legacy RAS), we have to keep inform user on RAS capability 
and apply DF workaround
b.  we can try to merge vega10 as well but that can be next step.

Regards,
Hawking
-Original Message-
From: Zhang, Hawking 
Sent: Wednesday, March 11, 2020 10:31
To: Zhang, Hawking ; Chen, Guchun ; 
amd-gfx@lists.freedesktop.org; Li, Dennis ; Zhou1, Tao 
; Clements, John 
Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Add one more check.

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus 3). disable all the IP 
block RAS if amdgpu_ras_enable == 0 4). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask 
5). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask 6). Remove the redundant RAS atombios query in 
gmc_v9_0_late_init for VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 On Behalf Of Zhang, Hawking
Sent: Wednesday, March 11, 2020 10:29
To: Chen, Guchun mailto:guchun.c...@amd.com>>; 
amd-gfx@lists.freedesktop.org; Li, Dennis 
mailto:dennis...@amd.com>>; Zhou1, Tao 
mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

I would suggest we organized the amdgpu_ras_check_supported in following logic

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus 3). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask 
4). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask 5). Remove the redundant RAS atombios query in 
gmc_v9_0_late_init for VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: Chen, Guchun mailto:guchun.c...@amd.com>>
Sent: Wednesday, March 11, 2020 09:57
To: amd-gfx@lists.freedesktop.org; Zhang, 
Hawking mailto:hawking.zh...@amd.com>>; Li, Dennis 
mailto:dennis...@amd.com>>; Zhou1, Tao 
mailto:tao.zh...@amd.com>>; Clements, John 
mailto:john.cleme...@amd.com>>
Cc: Chen, Guchun mailto:guchun.c...@amd.com>>
Subject: [PATCH] drm/amdgpu: update ras support capability with different sram 
ecc configuration

When sram ecc is disabled by vbios, ras initialization process in the 
corrresponding IPs that suppport sram ecc needs to be skipped. So update ras 
support capability accordingly on top of this configuration. This capability 
will block further ras operations to the unsupported IPs.

Signed-off-by: Guchun Chen mailto:guchun.c...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..79be004378fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,23 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;

-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported &

RE: [PATCH] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-10 Thread Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

Add one more check.

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus
3). disable all the IP block RAS if amdgpu_ras_enable == 0
4). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask
5). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask
6). Remove the redundant RAS atombios query in gmc_v9_0_late_init for 
VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: amd-gfx  On Behalf Of Zhang, 
Hawking
Sent: Wednesday, March 11, 2020 10:29
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Subject: RE: [PATCH] drm/amdgpu: update ras support capability with different 
sram ecc configuration

[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

I would suggest we organized the amdgpu_ras_check_supported in following logic

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus 3). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask 
4). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask 5). Remove the redundant RAS atombios query in 
gmc_v9_0_late_init for VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: Chen, Guchun 
Sent: Wednesday, March 11, 2020 09:57
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Cc: Chen, Guchun 
Subject: [PATCH] drm/amdgpu: update ras support capability with different sram 
ecc configuration

When sram ecc is disabled by vbios, ras initialization process in the 
corrresponding IPs that suppport sram ecc needs to be skipped. So update ras 
support capability accordingly on top of this configuration. This capability 
will block further ras operations to the unsupported IPs.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..79be004378fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,23 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   if (amdgpu_ras_enable == 0)
+   *supported = 0;
+   else {
+   *supported = *hw_supported;
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("Bypass IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Chawking.zhang%40amd.com%7C3d2355f98f2444a8327808d7c563f58f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637194905433994263sdata=tAAbGn2gNN05yUL%2FRIyn%2BSbUcIhu4lUQbcw6YO6cfd0%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-10 Thread Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

Hi Guchun,

I would suggest we organized the amdgpu_ras_check_supported in following logic

1). Disallow sriov guest/vf driver.
2). Only include ASIC families that has server skus
3). Check HBM ECC flag
a). explicitly inform users on the availability of this capability
b). if HBM ECC is not supported, disable UMC/DF RAS in amdgpu_ras_mask
4). Check SRAM ECC flag
a). explicitly inform users on the availability of this capability
b). if SRAM ECC flag is not supported, disable other IP Blocks in 
amdgpu_ras_mask
5). Remove the redundant RAS atombios query in gmc_v9_0_late_init for 
VEGA20/ARCTURUS
a). for Vega10 (legacy RAS), we have to keep inform user on RAS 
capability and apply DF workaround
b). we can try to merge vega10 as well but that can be next step.

Regards,
Hawking

-Original Message-
From: Chen, Guchun  
Sent: Wednesday, March 11, 2020 09:57
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; Li, 
Dennis ; Zhou1, Tao ; Clements, John 

Cc: Chen, Guchun 
Subject: [PATCH] drm/amdgpu: update ras support capability with different sram 
ecc configuration

When sram ecc is disabled by vbios, ras initialization process in the 
corrresponding IPs that suppport sram ecc needs to be skipped. So update ras 
support capability accordingly on top of this configuration. This capability 
will block further ras operations to the unsupported IPs.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..79be004378fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,23 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   if (amdgpu_ras_enable == 0)
+   *supported = 0;
+   else {
+   *supported = *hw_supported;
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("Bypass IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: update ras support capability with different sram ecc configuration

2020-03-10 Thread Guchun Chen

When sram ecc is disabled by vbios, ras initialization
process in the corrresponding IPs that suppport sram ecc
needs to be skipped. So update ras support capability
accordingly on top of this configuration. This capability
will block further ras operations to the unsupported IPs.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 69b02b9d4131..79be004378fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1748,8 +1748,23 @@ static void amdgpu_ras_check_supported(struct 
amdgpu_device *adev,
 amdgpu_atomfirmware_sram_ecc_supported(adev)))
*hw_supported = AMDGPU_RAS_BLOCK_MASK;
 
-   *supported = amdgpu_ras_enable == 0 ?
-   0 : *hw_supported & amdgpu_ras_mask;
+   if (amdgpu_ras_enable == 0)
+   *supported = 0;
+   else {
+   *supported = *hw_supported;
+   /*
+* When sram ecc is disabled in vbios, bypass those IP
+* blocks that support sram ecc, and only hold UMC and DF.
+*/
+   if (!amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+   DRM_INFO("Bypass IPs that support sram ecc.\n");
+   *supported &= (1 << AMDGPU_RAS_BLOCK__UMC |
+   1 << AMDGPU_RAS_BLOCK__DF);
+   }
+
+   /* ras support needs to align with module parmeter */
+   *supported &= amdgpu_ras_mask;
+   }
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in debugfs_init

2020-03-10 Thread Yang, Stanley

[AMD Official Use Only - Internal Distribution Only]

Hi Alex,

I will send another patch to make this change, because this patch is been 
pushed to branch.

Regards,
Stanley

-Original Message-
From: Alex Deucher  
Sent: Tuesday, March 10, 2020 9:23 PM
To: Yang, Stanley 
Cc: amd-gfx list ; Chen, Guchun 
; Zhou1, Tao ; Clements, John 
; Li, Dennis ; Zhang, Hawking 

Subject: Re: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in debugfs_init

On Mon, Mar 9, 2020 at 5:12 AM Stanley.Yang  wrote:
>
> From: Tao Zhou 
>
> and remove each ras IP's own debugfs creation
>
> Signed-off-by: Tao Zhou 
> Signed-off-by: Stanley.Yang 
> Change-Id: If3d16862afa0d97abad183dd6e60478b34029e95
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c   | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c| 1 -
>  8 files changed, 3 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index deca6343d0cc..1d513e4f9934 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1315,6 +1315,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,  
> DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
> amdgpu_debugfs_sclk_set, "%llu\n");
>
> +extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device 
> +*adev);

Can we put this in amdgpu_ras.h and include that instead?


>  int amdgpu_debugfs_init(struct amdgpu_device *adev)  {
> int r, i;
> @@ -1387,6 +1388,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> }
> }
>
> +   amdgpu_ras_debugfs_create_all(adev);
> +
> return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
> 
> ARRAY_SIZE(amdgpu_debugfs_list));  } diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7403588684b3..d12bb4a35780 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -565,7 +565,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
> int r;
> struct ras_fs_if fs_info = {
> .sysfs_name = "gfx_err_count",
> -   .debugfs_name = "gfx_err_inject",
> };
> struct ras_ih_if ih_info = {
> .cb = amdgpu_gfx_process_ras_data_cb, diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> index 676c48c02d77..ead3dc572ec5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> @@ -32,7 +32,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
> };
> struct ras_fs_if fs_info = {
> .sysfs_name = "mmhub_err_count",
> -   .debugfs_name = "mmhub_err_inject",
> };
>
> if (!adev->mmhub.ras_if) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> index 7d5c3a9de9ea..6201a5f4b4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> @@ -30,7 +30,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
> };
> struct ras_fs_if fs_info = {
> .sysfs_name = "pcie_bif_err_count",
> -   .debugfs_name = "pcie_bif_err_inject",
> };
>
> if (!adev->nbio.ras_if) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 422cdd1ce3ad..57af4ea8fb58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1178,7 +1178,6 @@ static void amdgpu_ras_debugfs_remove_all(struct 
> amdgpu_device *adev)  static int amdgpu_ras_fs_init(struct 
> amdgpu_device *adev)  {
> amdgpu_ras_sysfs_create_feature_node(adev);
> -   amdgpu_ras_debugfs_create_ctrl_node(adev);
>
> return 0;
>  }
> @@ -1882,8 +1881,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
> goto interrupt;
> }
>
> -   amdgpu_ras_debugfs_create(adev, fs_info);
> -
> r = amdgpu_ras_sysfs_create(adev, fs_info);
> if (r)
> goto sysfs;
> @@ -1892,7 +1889,6 @@ int amdgpu_ras_late_init(struct amdgpu_device 
> *adev,
>  cleanup:
> amdgpu_ras_sysfs_remove(adev, ras_block);
>  sysfs:
> -   amdgpu_ras_debugfs_remove(adev, ras_block);
> if (ih_info->cb)
> amdgpu_ras_interrupt_remove_handler(adev, ih_info);
>  interrupt:
> @@ -1909,7 +1905,6 @@ void

Re: [PATCH] drm/amdgpu/sriov refine vcn_v2_5_early_init func

2020-03-10 Thread Alex Deucher

On Tue, Mar 10, 2020 at 8:48 AM Jack Zhang  wrote:
>
> refine the assignment for vcn.num_vcn_inst,
> vcn.harvest_config, vcn.num_enc_rings in VF
>
> Signed-off-by: Jack Zhang 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 35 
> ++-
>  1 file changed, 18 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> index 2d64ba1..9b22e2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> @@ -74,29 +74,30 @@ static int amdgpu_ih_clientid_vcns[] = {
>  static int vcn_v2_5_early_init(void *handle)
>  {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -   if (adev->asic_type == CHIP_ARCTURUS) {
> -   u32 harvest;
> -   int i;
> -
> -   adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
> -   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> -   harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING);
> -   if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
> -   adev->vcn.harvest_config |= 1 << i;
> -   }
> -
> -   if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
> -AMDGPU_VCN_HARVEST_VCN1))
> -   /* both instances are harvested, disable the block */
> -   return -ENOENT;
> -   } else
> -   adev->vcn.num_vcn_inst = 1;
>
> if (amdgpu_sriov_vf(adev)) {
> adev->vcn.num_vcn_inst = 2;
> adev->vcn.harvest_config = 0;
> adev->vcn.num_enc_rings = 1;
> } else {
> +   if (adev->asic_type == CHIP_ARCTURUS) {
> +   u32 harvest;
> +   int i;
> +
> +   adev->vcn.num_vcn_inst = 
> VCN25_MAX_HW_INSTANCES_ARCTURUS;
> +   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> +   harvest = RREG32_SOC15(UVD, i, 
> mmCC_UVD_HARVESTING);
> +   if (harvest & 
> CC_UVD_HARVESTING__UVD_DISABLE_MASK)
> +   adev->vcn.harvest_config |= 1 << i;
> +   }
> +
> +   if (adev->vcn.harvest_config == 
> (AMDGPU_VCN_HARVEST_VCN0 |
> +   AMDGPU_VCN_HARVEST_VCN1))
> +   /* both instances are harvested, disable the 
> block */
> +   return -ENOENT;
> +   } else
> +   adev->vcn.num_vcn_inst = 1;
> +
> adev->vcn.num_enc_rings = 2;
> }
>
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[pull] amdgpu, amdkfd, scheduler drm-next-5.7

2020-03-10 Thread Alex Deucher

Hi Dave, Daniel,

Updates for 5.7.

The following changes since commit 60347451ddb0646c1a9cc5b9581e5bcf648ad1aa:

  Merge tag 'drm-misc-next-2020-02-27' of 
git://anongit.freedesktop.org/drm/drm-misc into drm-next (2020-02-28 16:22:41 
+1000)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux tags/amd-drm-next-5.7-2020-03-10

for you to fetch changes up to 5d11e37c021f925496a3a3c019cadf69435f65ed:

  drm/amdgpu/runpm: disable runpm on Vega10 (2020-03-10 15:55:18 -0400)


amd-drm-next-5.7-2020-03-10:

amdgpu:
- SR-IOV fixes
- Fix up fallout from drm load/unload callback removal
- Navi, renoir power management watermark fixes
- Refactor smu parameter handling
- Display FEC fixes
- Display DCC fixes
- HDCP fixes
- Add support for USB-C PD firmware updates
- Pollock detection fix
- Rework compute ring priority handling
- RAS fixes
- Misc cleanups

amdkfd:
- Consolidate more gfx config details in amdgpu
- Consolidate bo alloc flags
- Improve code comments
- SDMA MQD fixes
- Misc cleanups

gpu scheduler:
- Add suport for modifying the sched list

uapi:
- Clarify comments about GEM_CREATE flags that are not used by userspace.
  The kernel driver has always prevented userspace from using these.
  They are only used internally in the kernel driver.


Aly-Tawfik (2):
  drm/amdgpu/display: fix pci revision id fetching
  drm/amdgpu/display: Fix Pollock Variant Detection

Andrey Grodzovsky (6):
  drm/amdgpu: Add USBC PD FW load interface to PSP.
  drm/amdgpu: Add USBC PD FW load to PSP 11
  drm/amdgpu: Add support for USBC PD FW download
  drm/amdgpu: Wrap clflush_cache_range with x86 ifdef
  drm/amdgpu: Fix GPU reset error.
  drm/amdgpu: Enter low power state if CRTC active.

Aric Cyr (2):
  drm/amd/display: 3.2.75
  drm/amd/display: 3.2.76

Bhawanpreet Lakha (1):
  drm/amd/display: Clear link settings on MST disable connector

Braden Bakker (1):
  drm/amd/display: Add registry for mem pwr control

Chengming Gui (2):
  drm/amdgpu: add lock option for smu_set_soft_freq_range()
  drm/amdgpu: Add debugfs interface to set arbitrary sclk for navi14 (v2)

Colin Ian King (2):
  drm/amdkfd: fix indentation issue
  drm/amd/display: fix indentation issue on a hunk of code

Dan Carpenter (1):
  drm/amd/display: Fix dmub_psr_destroy()

Emily Deng (1):
  drm/amdgpu/sriov: Use kiq to copy the gpu clock

Eric Bernstein (1):
  drm/amd/display: Fix default logger mask definition

Eric Huang (1):
  drm/amdkfd: change SDMA MQD memory type

Feifei Xu (1):
  drm/amdgpu/runpm: disable runpm on Vega10

Felix Kuehling (1):
  drm/amdkfd: Signal eviction fence on process destruction (v2)

George Shen (1):
  drm/amd/display: Workaround to do HDCP authentication twice on certain 
displays

Guchun Chen (1):
  drm/amdgpu: toggle DF-Cstate when accessing UMC ras error related 
registers

Hawking Zhang (10):
  drm/amdgpu: add reset_ras_error_count function for SDMA
  drm/amdgpu: add reset_ras_error_count function for MMHUB
  drm/amdgpu: add reset_ras_error_count function for GFX
  drm/amdgpu: add reset_ras_error_count function for HDP
  drm/amdgpu: correct ROM_INDEX/DATA offset for VEGA20
  drm/amdgpu: add xgmi ip headers
  drm/amdgpu: add wafl2 ip headers
  drm/amdgpu: add helper funcs to detect PCS error
  drm/amdgpu: enable PCS error report on VG20
  drm/amdgpu: enable PCS error report on arcturus

Hersen Wu (2):
  drm/amdgpu/powerplay: nv1x, renior copy dcn clock settings of watermark 
to smu during boot up
  drm/amdgpu/display: navi1x copy dcn watermark clock settings to smu 
resume from s3 (v2)

Isabel Zhang (2):
  drm/amd/display: Move mod_hdcp_displays to mod_hdcp struct
  drm/amd/display: Add stay count and bstatus to HDCP log

Jacob He (2):
  drm/amdgpu: Initialize SPM_VMID with 0xf (v2)
  drm/amdgpu: Update SPM_VMID with the job's vmid when application reserves 
the vmid

John Clements (2):
  drm/amdgpu: increase atombios cmd timeout
  drm/amdgpu: update page retirement sequence

Joseph Gravenor (1):
  drm/amd/display: add worst case dcc meta pitch to fake plane

Josip Pavic (1):
  drm/amd/display: fix dcc swath size calculations on dcn1

Martin Leung (3):
  drm/amd/display: Link training TPS1 workaround add back in dpcd
  drm/amd/display: update soc bb for nv14
  drm/amd/display: writing stereo polarity register if swapped

Matt Coffin (3):
  drm/amdgpu/powerplay: Refactor SMU message handling for safety
  drm/amdgpu/powerplay: Remove deprecated smc_read_arg
  drm/amdgpu/smu: Add message sending lock

Melissa Wen (3):
  drm/amd/display: dc_link: code clean up on enable_link_dp function
  drm/amd/display: dc_link: code clean up on detect_dp function
  drm/amd/display:

Re: [PATCH v2 4/4] drm/amdgpu/vcn2.5: add sync when WPTR/RPTR reset

2020-03-10 Thread Leo Liu




On 2020-03-10 3:58 p.m., James Zhu wrote:

Add vcn harware and firmware synchronization to fix race condition
issue among vcn driver, hardware and firmware

v2: WA: Add scratch 3 to sync with vcn firmware during W/R pointer reset

Signed-off-by: James Zhu 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 12 
  1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 2d64ba1..9480039 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1034,6 +1034,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp);
  
+		/* Set scratch3 to start dec/enc registers reset */

+   WREG32_SOC15(UVD, i, mmUVD_SCRATCH3, 1);
+
/* programm the RB_BASE for ring buffer */
WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
@@ -1059,6 +1062,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+
+   /* Clear scratch3 to finish dec/enc registers reset */
+   WREG32_SOC15(UVD, i, mmUVD_SCRATCH3, 0);
}
  
  	return 0;

@@ -1388,8 +1394,11 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
   
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
  
+/* Stall DPG before WPTR/RPTR reset */

+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, 
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);


You can wrap up the line shorter? With that fixed, this patch is

Reviewed-by: Leo Liu 


Leo



/* Restore */
ring = >vcn.inst[inst_idx].ring_enc[0];
+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, 
ring->ring_size / 4);
@@ -1397,6 +1406,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, 
lower_32_bits(ring->wptr));
  
  ring = >vcn.inst[inst_idx].ring_enc[1];

+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, 
ring->ring_size / 4);
@@ -1405,6 +1415,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
  
  WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,

   RREG32_SOC15(UVD, inst_idx, 
mmUVD_SCRATCH2) & 0x7FFF);
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
  
  SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,

   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, 
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v2 4/4] drm/amdgpu/vcn2.5: add sync when WPTR/RPTR reset

2020-03-10 Thread James Zhu

Add vcn harware and firmware synchronization to fix race condition
issue among vcn driver, hardware and firmware

v2: WA: Add scratch 3 to sync with vcn firmware during W/R pointer reset

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 2d64ba1..9480039 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1034,6 +1034,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp);
 
+   /* Set scratch3 to start dec/enc registers reset */
+   WREG32_SOC15(UVD, i, mmUVD_SCRATCH3, 1);
+
/* programm the RB_BASE for ring buffer */
WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
@@ -1059,6 +1062,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+
+   /* Clear scratch3 to finish dec/enc registers reset */
+   WREG32_SOC15(UVD, i, mmUVD_SCRATCH3, 0);
}
 
return 0;
@@ -1388,8 +1394,11 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
   
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
 
+   /* Stall DPG before WPTR/RPTR reset */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, 
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Restore */
ring = >vcn.inst[inst_idx].ring_enc[0];
+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, 
ring->ring_size / 4);
@@ -1397,6 +1406,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, 
lower_32_bits(ring->wptr));
 
ring = >vcn.inst[inst_idx].ring_enc[1];
+   ring->wptr = 0;
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, 
ring->gpu_addr);
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, 
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, 
ring->ring_size / 4);
@@ -1405,6 +1415,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device 
*adev,
 
WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
   RREG32_SOC15(UVD, inst_idx, 
mmUVD_SCRATCH2) & 0x7FFF);
+   /* Unstall DPG */
+   WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, 
mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
SOC15_WAIT_ON_RREG(UVD, inst_idx, 
mmUVD_POWER_STATUS,
   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, 
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu/sriov refine vcn_v2_5_early_init func

2020-03-10 Thread Zhang, Jack (Jian)

Ping...

-Original Message-
From: amd-gfx  On Behalf Of Jack Zhang
Sent: Tuesday, March 10, 2020 8:49 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Jack (Jian) ; Zhang, Jack (Jian) 

Subject: [PATCH] drm/amdgpu/sriov refine vcn_v2_5_early_init func

refine the assignment for vcn.num_vcn_inst, vcn.harvest_config, 
vcn.num_enc_rings in VF

Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 2d64ba1..9b22e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -74,29 +74,30 @@ static int amdgpu_ih_clientid_vcns[] = {  static int 
vcn_v2_5_early_init(void *handle)  {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   if (adev->asic_type == CHIP_ARCTURUS) {
-   u32 harvest;
-   int i;
-
-   adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
-   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-   harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING);
-   if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
-   adev->vcn.harvest_config |= 1 << i;
-   }
-
-   if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
-AMDGPU_VCN_HARVEST_VCN1))
-   /* both instances are harvested, disable the block */
-   return -ENOENT;
-   } else
-   adev->vcn.num_vcn_inst = 1;
 
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = 2;
adev->vcn.harvest_config = 0;
adev->vcn.num_enc_rings = 1;
} else {
+   if (adev->asic_type == CHIP_ARCTURUS) {
+   u32 harvest;
+   int i;
+
+   adev->vcn.num_vcn_inst = 
VCN25_MAX_HW_INSTANCES_ARCTURUS;
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+   harvest = RREG32_SOC15(UVD, i, 
mmCC_UVD_HARVESTING);
+   if (harvest & 
CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+   adev->vcn.harvest_config |= 1 << i;
+   }
+
+   if (adev->vcn.harvest_config == 
(AMDGPU_VCN_HARVEST_VCN0 |
+   AMDGPU_VCN_HARVEST_VCN1))
+   /* both instances are harvested, disable the 
block */
+   return -ENOENT;
+   } else
+   adev->vcn.num_vcn_inst = 1;
+
adev->vcn.num_enc_rings = 2;
}
 
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CJack.Zhang1%40amd.com%7Cb499c8855c4e497bbeee08d7c4f15e1a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637194413257832189sdata=Tw6BCqhv%2BteHBneDLesEYilaiu6%2F8oKQX4KKRAlYdtQ%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: refactor RLCG access path part 1

2020-03-10 Thread Monk Liu

what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX/SQ_IND_DATA to GFX9 RLCG path to align with
SRIOV RLCG logic

background:
we what to clear the code path for WREG32_RLC, to make it only covered
and handled by amdgpu_mm_wreg() routine, this way we can let RLCG
to serve the register access even through UMR (via debugfs interface)
the current implementation cannot achieve that goal because it can only
hardcode everywhere, but UMR only pass "offset" as varable to driver

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |  80 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 177 +++-
 drivers/gpu/drm/amd/amdgpu/soc15.h  |   7 ++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 52509c2..60bb3e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
 struct amdgpu_rlc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 82ef08d..3222cd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -224,6 +224,56 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)
 };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_10_0[] = {
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_HI)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_ADDR_LO)},
+   {SOC15_REG_ENTRY(GC, 0, mmRLC_CSIB_LENGTH)},
+   {SOC15_REG_ENTRY(GC, 0, mmCP_ME_CNTL)},
+};
+
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+   static void *scratch_reg0;
+   static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
+   static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+
+   scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+   scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+   grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
+   grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
+
+   if (amdgpu_sriov_runtime(adev)) {
+   pr_err("shoudn't call rlcg write register during runtime\n");
+   return;
+   }
+
+   writel(v, scratch_reg0);
+   writel(offset | 0x8000, scratch_reg1);
+   writel(1, spare_int);
+   for (i = 0; i < retries; i++) {
+   u32 tmp;
+
+   tmp = readl(scratch_reg1);
+   if (!(tmp & 0x8000))
+   break;
+
+   udelay(10);
+   }
+
+   if (i >= retries)
+   pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 {
/* Pending on emulation bring up */
@@ -4247,6 +4297,32 @@ static void gfx_v10_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+   uint32_t offset,
+   struct soc15_reg_rlcg *entries, int 
arr_size)
+{
+   int i;
+   uint32_t reg;
+
+   for (i = 0; i < arr_size; i++) {
+   const struct soc15_reg_rlcg *entry;
+
+   entry = [i];
+   reg = 
adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+   if (offset == reg)
+   return true;
+   }
+
+   return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 
offset)
+{
+   return gfx_v10_0_check_rlcg_range(adev, offset,
+

[PATCH 2/2] drm/amdgpu: refactor RLCG access path part 2

2020-03-10 Thread Monk Liu

switch to new RLCG access path, and drop the legacy
WREG32_RLC macros

tested-by: Monk Liu 
tested-by: Zhou pengju 
Signed-off-by: Zhou pengju 
Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  30 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   5 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 104 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |  28 +++---
 drivers/gpu/drm/amd/amdgpu/soc15.c|  11 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  57 
 8 files changed, 93 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2..a21f005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -105,8 +105,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev 
*kgd, uint32_t vmid,
 
lock_srbm(kgd, 0, 0, 0, vmid);
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
 
unlock_srbm(kgd);
@@ -242,13 +242,13 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
 
for (reg = hqd_base;
 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-   WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+   WREG32(reg, mqd_hqd[reg - hqd_base]);
 
 
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -277,25 +277,25 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, 
uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
   lower_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
   upper_32_bits(guessed_wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
   lower_32_bits((uintptr_t)wptr));
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+  get_queue_mask(adev, pipe_id, queue_id));
}
 
/* Start the EOP fetcher */
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
   REG_SET_FIELD(m->cp_hqd_eop_rptr,
 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
release_queue(kgd);
 
@@ -547,7 +547,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
 
if (m->cp_hqd_vmid == 0)
-   WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+   WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -561,7 +561,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
 
-   WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -656,7 +656,7 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
 
mutex_lock(>grbm_idx_mutex);
 
-   WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX,

Re: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in debugfs_init

2020-03-10 Thread Alex Deucher

On Mon, Mar 9, 2020 at 5:12 AM Stanley.Yang  wrote:
>
> From: Tao Zhou 
>
> and remove each ras IP's own debugfs creation
>
> Signed-off-by: Tao Zhou 
> Signed-off-by: Stanley.Yang 
> Change-Id: If3d16862afa0d97abad183dd6e60478b34029e95
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c   | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c| 1 -
>  8 files changed, 3 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index deca6343d0cc..1d513e4f9934 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1315,6 +1315,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
>  DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
> amdgpu_debugfs_sclk_set, "%llu\n");
>
> +extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);

Can we put this in amdgpu_ras.h and include that instead?


>  int amdgpu_debugfs_init(struct amdgpu_device *adev)
>  {
> int r, i;
> @@ -1387,6 +1388,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> }
> }
>
> +   amdgpu_ras_debugfs_create_all(adev);
> +
> return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
> ARRAY_SIZE(amdgpu_debugfs_list));
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7403588684b3..d12bb4a35780 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -565,7 +565,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
> int r;
> struct ras_fs_if fs_info = {
> .sysfs_name = "gfx_err_count",
> -   .debugfs_name = "gfx_err_inject",
> };
> struct ras_ih_if ih_info = {
> .cb = amdgpu_gfx_process_ras_data_cb,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> index 676c48c02d77..ead3dc572ec5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> @@ -32,7 +32,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
> };
> struct ras_fs_if fs_info = {
> .sysfs_name = "mmhub_err_count",
> -   .debugfs_name = "mmhub_err_inject",
> };
>
> if (!adev->mmhub.ras_if) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> index 7d5c3a9de9ea..6201a5f4b4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> @@ -30,7 +30,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
> };
> struct ras_fs_if fs_info = {
> .sysfs_name = "pcie_bif_err_count",
> -   .debugfs_name = "pcie_bif_err_inject",
> };
>
> if (!adev->nbio.ras_if) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 422cdd1ce3ad..57af4ea8fb58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1178,7 +1178,6 @@ static void amdgpu_ras_debugfs_remove_all(struct 
> amdgpu_device *adev)
>  static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
>  {
> amdgpu_ras_sysfs_create_feature_node(adev);
> -   amdgpu_ras_debugfs_create_ctrl_node(adev);
>
> return 0;
>  }
> @@ -1882,8 +1881,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
> goto interrupt;
> }
>
> -   amdgpu_ras_debugfs_create(adev, fs_info);
> -
> r = amdgpu_ras_sysfs_create(adev, fs_info);
> if (r)
> goto sysfs;
> @@ -1892,7 +1889,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
>  cleanup:
> amdgpu_ras_sysfs_remove(adev, ras_block);
>  sysfs:
> -   amdgpu_ras_debugfs_remove(adev, ras_block);
> if (ih_info->cb)
> amdgpu_ras_interrupt_remove_handler(adev, ih_info);
>  interrupt:
> @@ -1909,7 +1905,6 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,
> return;
>
> amdgpu_ras_sysfs_remove(adev, ras_block);
> -   amdgpu_ras_debugfs_remove(adev, ras_block);
> if (ih_info->cb)
>  amdgpu_ras_interrupt_remove_handler(adev, ih_info);
> amdgpu_ras_feature_enable(adev, ras_block, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 7854c053e85d..250a309e4dee 100644
> ---

Re: [PATCH 2/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Nirmoy



On 3/10/20 2:00 PM, Christian König wrote:

Am 10.03.20 um 13:24 schrieb Nirmoy Das:

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.


Moving the code around is a start, but it doesn't buy us much.



Agreed.


We could go for the big cleanup or at least move the individual 
scheduler arrays from the per IP structures into amdgpu_device.c


I will go for the big cleanup by adding priority as param to 
amdgpu_ring_init().





How much time can and want you to spend on it?

Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 75 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h    |  3 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 85 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
  5 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index fa575bdc03c8..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,78 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr 
*mgr)

  idr_destroy(>ctx_handles);
  mutex_destroy(>lock);
  }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-    int num_compute_sched_normal = 0;
-    int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-    int i;
-
-    /* use one drm sched array, gfx.compute_sched to store both high 
and

- * normal priority drm compute schedulers */
-    for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-    if (!adev->gfx.compute_ring[i].has_high_prio)
- adev->gfx.compute_sched[num_compute_sched_normal++] =
-    >gfx.compute_ring[i].sched;
-    else
- adev->gfx.compute_sched[num_compute_sched_high--] =
-    >gfx.compute_ring[i].sched;
-    }
-
-    /* compute ring only has two priority for now */
-    i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-    adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-    adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-    i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-    if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
-    /* When compute has no high priority rings then use */
-    /* normal priority sched array */
-    adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-    adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-    } else {
-    adev->gfx.compute_prio_sched[i] =
- >gfx.compute_sched[num_compute_sched_high - 1];
-    adev->gfx.num_compute_sched[i] =
-    adev->gfx.num_compute_rings - num_compute_sched_normal;
-    }
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-    int i, j;
-
-    amdgpu_ctx_init_compute_sched(adev);
-    for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-    adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-    adev->gfx.num_gfx_sched++;
-    }
-
-    for (i = 0; i < adev->sdma.num_instances; i++) {
-    adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-    adev->sdma.num_sdma_sched++;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
- adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-    >vcn.inst[i].ring_dec.sched;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
-    for (j = 0; j < adev->vcn.num_enc_rings; ++j)
- adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-    >vcn.inst[i].ring_enc[j].sched;
-    }
-
-    for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-    if (adev->jpeg.harvest_config & (1 << i))
-    continue;
- adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-    >jpeg.inst[i].ring_dec.sched;
-    }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct 
amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long 
timeout);

  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  -void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  adev->gfx.config.max_cu_per_sh,
  adev->gfx.cu_info.number);
  -    amdgpu_ctx_init_sched(adev);
+    amdgpu_ring_init_sched(adev);
    adev->accel_working = true;
  diff --git

Re: [PATCH 2/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Christian König


Am 10.03.20 um 13:24 schrieb Nirmoy Das:

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.


Moving the code around is a start, but it doesn't buy us much.

We could go for the big cleanup or at least move the individual 
scheduler arrays from the per IP structures into amdgpu_device.c


How much time can and want you to spend on it?

Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 75 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  3 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 85 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
  5 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,78 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
  }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-   int num_compute_sched_normal = 0;
-   int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-   int i;
-
-   /* use one drm sched array, gfx.compute_sched to store both high and
-* normal priority drm compute schedulers */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   if (!adev->gfx.compute_ring[i].has_high_prio)
-   adev->gfx.compute_sched[num_compute_sched_normal++] =
-   >gfx.compute_ring[i].sched;
-   else
-   adev->gfx.compute_sched[num_compute_sched_high--] =
-   >gfx.compute_ring[i].sched;
-   }
-
-   /* compute ring only has two priority for now */
-   i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-   i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
-   /* When compute has no high priority rings then use */
-   /* normal priority sched array */
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-   } else {
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
-   }
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-   int i, j;
-
-   amdgpu_ctx_init_compute_sched(adev);
-   for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-   adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-   adev->gfx.num_gfx_sched++;
-   }
-
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-   adev->sdma.num_sdma_sched++;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-   >vcn.inst[i].ring_dec.sched;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   for (j = 0; j < adev->vcn.num_enc_rings; ++j)
-   adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-   >vcn.inst[i].ring_enc[j].sched;
-   }
-
-   for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-   if (adev->jpeg.harvest_config & (1 << i))
-   continue;
-   adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-   >jpeg.inst[i].ring_dec.sched;
-   }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev);

-
-
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@

Re: [bug report] drm/amd/amdgpu: Add debugfs support for reading GPRs (v2)

2020-03-10 Thread Tom St Denis

Sorry about missing that.  A fix was sent to the list a few mins ago.  
It also highlighted a bug in umr's reading of trap registers.  It's a 
genuine two-fer!


Tom


On 2020-03-10 8:23 a.m., Dan Carpenter wrote:

On Tue, Nov 28, 2017 at 09:37:44AM -0500, Tom St Denis wrote:

On 28/11/17 09:29 AM, Dan Carpenter wrote:

Hello Tom St Denis,

The patch c5a60ce81b49: "drm/amd/amdgpu: Add debugfs support for
reading GPRs (v2)" from Dec 5, 2016, leads to the following static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3774 
amdgpu_debugfs_gpr_read()
error: buffer overflow 'data' 1024 <= 4095

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
3731  static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
*buf,
3732  size_t size, loff_t *pos)
3733  {
3734  struct amdgpu_device *adev = f->f_inode->i_private;
3735  int r;
3736  ssize_t result = 0;
3737  uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
3738
3739  if (size & 3 || *pos & 3)
3740  return -EINVAL;
3741
3742  /* decode offset */
3743  offset = *pos & GENMASK_ULL(11, 0);
  ^^
offset is set to 0-4095.

3744  se = (*pos & GENMASK_ULL(19, 12)) >> 12;
3745  sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
3746  cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
3747  wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
3748  simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
3749  thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
3750  bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
3751
3752  data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
   
data is a 1024 element array

3753  if (!data)
3754  return -ENOMEM;
3755
3756  /* switch to the specific se/sh/cu */
3757  mutex_lock(>grbm_idx_mutex);
3758  amdgpu_gfx_select_se_sh(adev, se, sh, cu);
3759
3760  if (bank == 0) {
3761  if (adev->gfx.funcs->read_wave_vgprs)
3762  adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, 
thread, offset, size>>2, data);
3763  } else {
3764  if (adev->gfx.funcs->read_wave_sgprs)
3765  adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, 
offset, size>>2, data);
3766  }
3767
3768  amdgpu_gfx_select_se_sh(adev, 0x, 0x, 
0x);
3769  mutex_unlock(>grbm_idx_mutex);
3770
3771  while (size) {
3772  uint32_t value;
3773
3774  value = data[offset++];
  ^^
We're possibly reading beyond the end of the array.  Maybe we are
supposed to divide the offset /= sizeof(*data)?

Hi Dan,


umr only reads from offset zero but to be consistent I think yes the offset
should be /= 4 first since we ensure it's 4 byte aligned it's clearly a 4
byte offset.

Thanks for finding this, I'll craft up a patch shortly.


What ever happened with this?

regards,
dan carpenter


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/amdgpu: Fix GPR read from debugfs

2020-03-10 Thread Christian König


Am 10.03.20 um 13:53 schrieb Tom St Denis:

The offset into the array was specified in bytes but should
be in terms of 32-bit words.  Also prevent large reads that
would also cause a buffer overread.

Signed-off-by: Tom St Denis 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c573edf02afc..e0f4ccd91fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
ssize_t result = 0;
uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
  
-	if (size & 3 || *pos & 3)

+   if (size > 4096 || size & 3 || *pos & 3)
return -EINVAL;
  
  	/* decode offset */

-   offset = *pos & GENMASK_ULL(11, 0);
+   offset = (*pos & GENMASK_ULL(11, 0)) / 4;
se = (*pos & GENMASK_ULL(19, 12)) >> 12;
sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
cu = (*pos & GENMASK_ULL(35, 28)) >> 28;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/amdgpu: Fix GPR read from debugfs

2020-03-10 Thread Tom St Denis

The offset into the array was specified in bytes but should
be in terms of 32-bit words.  Also prevent large reads that
would also cause a buffer overread.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c573edf02afc..e0f4ccd91fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -783,11 +783,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
ssize_t result = 0;
uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
 
-   if (size & 3 || *pos & 3)
+   if (size > 4096 || size & 3 || *pos & 3)
return -EINVAL;
 
/* decode offset */
-   offset = *pos & GENMASK_ULL(11, 0);
+   offset = (*pos & GENMASK_ULL(11, 0)) / 4;
se = (*pos & GENMASK_ULL(19, 12)) >> 12;
sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
-- 
2.24.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/sriov refine vcn_v2_5_early_init func

2020-03-10 Thread Jack Zhang

refine the assignment for vcn.num_vcn_inst,
vcn.harvest_config, vcn.num_enc_rings in VF

Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 2d64ba1..9b22e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -74,29 +74,30 @@ static int amdgpu_ih_clientid_vcns[] = {
 static int vcn_v2_5_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   if (adev->asic_type == CHIP_ARCTURUS) {
-   u32 harvest;
-   int i;
-
-   adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
-   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-   harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING);
-   if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
-   adev->vcn.harvest_config |= 1 << i;
-   }
-
-   if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
-AMDGPU_VCN_HARVEST_VCN1))
-   /* both instances are harvested, disable the block */
-   return -ENOENT;
-   } else
-   adev->vcn.num_vcn_inst = 1;
 
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = 2;
adev->vcn.harvest_config = 0;
adev->vcn.num_enc_rings = 1;
} else {
+   if (adev->asic_type == CHIP_ARCTURUS) {
+   u32 harvest;
+   int i;
+
+   adev->vcn.num_vcn_inst = 
VCN25_MAX_HW_INSTANCES_ARCTURUS;
+   for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+   harvest = RREG32_SOC15(UVD, i, 
mmCC_UVD_HARVESTING);
+   if (harvest & 
CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+   adev->vcn.harvest_config |= 1 << i;
+   }
+
+   if (adev->vcn.harvest_config == 
(AMDGPU_VCN_HARVEST_VCN0 |
+   AMDGPU_VCN_HARVEST_VCN1))
+   /* both instances are harvested, disable the 
block */
+   return -ENOENT;
+   } else
+   adev->vcn.num_vcn_inst = 1;
+
adev->vcn.num_enc_rings = 2;
}
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [bug report] drm/amd/amdgpu: Add debugfs support for reading GPRs (v2)

2020-03-10 Thread Dan Carpenter

On Tue, Nov 28, 2017 at 09:37:44AM -0500, Tom St Denis wrote:
> On 28/11/17 09:29 AM, Dan Carpenter wrote:
> > Hello Tom St Denis,
> > 
> > The patch c5a60ce81b49: "drm/amd/amdgpu: Add debugfs support for
> > reading GPRs (v2)" from Dec 5, 2016, leads to the following static
> > checker warning:
> > 
> > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3774 
> > amdgpu_debugfs_gpr_read()
> > error: buffer overflow 'data' 1024 <= 4095
> > 
> > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >3731  static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> > *buf,
> >3732  size_t size, loff_t *pos)
> >3733  {
> >3734  struct amdgpu_device *adev = f->f_inode->i_private;
> >3735  int r;
> >3736  ssize_t result = 0;
> >3737  uint32_t offset, se, sh, cu, wave, simd, thread, bank, 
> > *data;
> >3738
> >3739  if (size & 3 || *pos & 3)
> >3740  return -EINVAL;
> >3741
> >3742  /* decode offset */
> >3743  offset = *pos & GENMASK_ULL(11, 0);
> >  ^^
> > offset is set to 0-4095.
> > 
> >3744  se = (*pos & GENMASK_ULL(19, 12)) >> 12;
> >3745  sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
> >3746  cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
> >3747  wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
> >3748  simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
> >3749  thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
> >3750  bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
> >3751
> >3752  data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
> >   
> > data is a 1024 element array
> > 
> >3753  if (!data)
> >3754  return -ENOMEM;
> >3755
> >3756  /* switch to the specific se/sh/cu */
> >3757  mutex_lock(>grbm_idx_mutex);
> >3758  amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> >3759
> >3760  if (bank == 0) {
> >3761  if (adev->gfx.funcs->read_wave_vgprs)
> >3762  adev->gfx.funcs->read_wave_vgprs(adev, 
> > simd, wave, thread, offset, size>>2, data);
> >3763  } else {
> >3764  if (adev->gfx.funcs->read_wave_sgprs)
> >3765  adev->gfx.funcs->read_wave_sgprs(adev, 
> > simd, wave, offset, size>>2, data);
> >3766  }
> >3767
> >3768  amdgpu_gfx_select_se_sh(adev, 0x, 0x, 
> > 0x);
> >3769  mutex_unlock(>grbm_idx_mutex);
> >3770
> >3771  while (size) {
> >3772  uint32_t value;
> >3773
> >3774  value = data[offset++];
> >  ^^
> > We're possibly reading beyond the end of the array.  Maybe we are
> > supposed to divide the offset /= sizeof(*data)?
> 
> Hi Dan,
> 
> 
> umr only reads from offset zero but to be consistent I think yes the offset
> should be /= 4 first since we ensure it's 4 byte aligned it's clearly a 4
> byte offset.
> 
> Thanks for finding this, I'll craft up a patch shortly.
> 

What ever happened with this?

regards,
dan carpenter

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: do not set nil entry in compute_prio_sched

2020-03-10 Thread Christian König


Am 10.03.20 um 13:24 schrieb Nirmoy Das:

If there are no high priority compute queues available then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König  for this one.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3b2370ad1e47..fa575bdc03c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -686,10 +686,17 @@ static void amdgpu_ctx_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
  
  	i = AMDGPU_GFX_PIPE_PRIO_HIGH;

-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
  }
  
  void amdgpu_ctx_init_sched(struct amdgpu_device *adev)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Nirmoy Das

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 75 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  3 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 85 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
 5 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index fa575bdc03c8..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,78 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
 }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-   int num_compute_sched_normal = 0;
-   int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-   int i;
-
-   /* use one drm sched array, gfx.compute_sched to store both high and
-* normal priority drm compute schedulers */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   if (!adev->gfx.compute_ring[i].has_high_prio)
-   adev->gfx.compute_sched[num_compute_sched_normal++] =
-   >gfx.compute_ring[i].sched;
-   else
-   adev->gfx.compute_sched[num_compute_sched_high--] =
-   >gfx.compute_ring[i].sched;
-   }
-
-   /* compute ring only has two priority for now */
-   i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-   i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
-   /* When compute has no high priority rings then use */
-   /* normal priority sched array */
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-   } else {
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
-   }
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-   int i, j;
-
-   amdgpu_ctx_init_compute_sched(adev);
-   for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-   adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-   adev->gfx.num_gfx_sched++;
-   }
-
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-   adev->sdma.num_sdma_sched++;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-   >vcn.inst[i].ring_dec.sched;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   for (j = 0; j < adev->vcn.num_enc_rings; ++j)
-   adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-   >vcn.inst[i].ring_enc[j].sched;
-   }
-
-   for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-   if (adev->jpeg.harvest_config & (1 << i))
-   continue;
-   adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-   >jpeg.inst[i].ring_dec.sched;
-   }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.config.max_cu_per_sh,
adev->gfx.cu_info.number);
 
-   amdgpu_ctx_init_sched(adev);
+   amdgpu_ring_init_sched(adev);
 
adev->accel_working = true;
 
diff --git

[PATCH 1/2] drm/amdgpu: do not set nil entry in compute_prio_sched

2020-03-10 Thread Nirmoy Das

If there are no high priority compute queues available then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3b2370ad1e47..fa575bdc03c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -686,10 +686,17 @@ static void amdgpu_ctx_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
 
i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
 }
 
 void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Nirmoy



On 3/10/20 12:41 PM, Christian König wrote:

Hi Nirmoy,

you can stick with that for now.

In the long term we should make the priority a parameter of 
amdgpu_ring_init(). And then amdgpu_ring_init() can gather the rings 
by priority and type.


That in turn would make amdgpu_ring_init_sched() and 
amdgpu_ring_init_compute_sched() superfluous.



Yes that would be even better.



But fixing the bug with the NULL pointer dereference should come 
first, everything else is just cleanup and has lower urgency.



I will swap these two patches.


Thanks,

Nirmoy



Regards,
Christian.

Am 10.03.20 um 12:39 schrieb Nirmoy:

Hi Christian,


I think we still need amdgpu_ring.has_high_prio bool. I was thinking 
of using


amdgpu_gfx_is_high_priority_compute_queue() to see if a ring is set 
to high priority


but then I realized we don't support high priority gfx queue on gfx7 
and less.



Regards,

Nirmoy

On 3/10/20 12:27 PM, Nirmoy Das wrote:

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 68 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h    |  3 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 77 
++

  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
  5 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 3b2370ad1e47..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,71 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr 
*mgr)

  idr_destroy(>ctx_handles);
  mutex_destroy(>lock);
  }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-    int num_compute_sched_normal = 0;
-    int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-    int i;
-
-    /* use one drm sched array, gfx.compute_sched to store both 
high and

- * normal priority drm compute schedulers */
-    for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-    if (!adev->gfx.compute_ring[i].has_high_prio)
- adev->gfx.compute_sched[num_compute_sched_normal++] =
-    >gfx.compute_ring[i].sched;
-    else
- adev->gfx.compute_sched[num_compute_sched_high--] =
-    >gfx.compute_ring[i].sched;
-    }
-
-    /* compute ring only has two priority for now */
-    i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-    adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-    adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-    i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-    adev->gfx.compute_prio_sched[i] =
- >gfx.compute_sched[num_compute_sched_high - 1];
-    adev->gfx.num_compute_sched[i] =
-    adev->gfx.num_compute_rings - num_compute_sched_normal;
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-    int i, j;
-
-    amdgpu_ctx_init_compute_sched(adev);
-    for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-    adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-    adev->gfx.num_gfx_sched++;
-    }
-
-    for (i = 0; i < adev->sdma.num_instances; i++) {
-    adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-    adev->sdma.num_sdma_sched++;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
- adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-    >vcn.inst[i].ring_dec.sched;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
-    for (j = 0; j < adev->vcn.num_enc_rings; ++j)
- adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-    >vcn.inst[i].ring_enc[j].sched;
-    }
-
-    for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-    if (adev->jpeg.harvest_config & (1 << i))
-    continue;
- adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-    >jpeg.inst[i].ring_dec.sched;
-    }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct 
amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long 
timeout);

  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  -void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

Re: [PATCH 2/2] drm/amdgpu: fix assigning nil entry in compute_prio_sched

2020-03-10 Thread Christian König


Am 10.03.20 um 12:27 schrieb Nirmoy Das:

If there is no high priority compute queue then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]


Please move that patch to the beginning of the series since it is a bug fix.

Thanks,
Christian.



Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 99875dd633e6..01faeb8b4ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -478,10 +478,18 @@ static void amdgpu_ring_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
  
  	i = AMDGPU_GFX_PIPE_PRIO_HIGH;

-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
  }
  
  /**


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Christian König


Hi Nirmoy,

you can stick with that for now.

In the long term we should make the priority a parameter of 
amdgpu_ring_init(). And then amdgpu_ring_init() can gather the rings by 
priority and type.


That in turn would make amdgpu_ring_init_sched() and 
amdgpu_ring_init_compute_sched() superfluous.


But fixing the bug with the NULL pointer dereference should come first, 
everything else is just cleanup and has lower urgency.


Regards,
Christian.

Am 10.03.20 um 12:39 schrieb Nirmoy:

Hi Christian,


I think we still need amdgpu_ring.has_high_prio bool. I was thinking 
of using


amdgpu_gfx_is_high_priority_compute_queue() to see if a ring is set to 
high priority


but then I realized we don't support high priority gfx queue on gfx7 
and less.



Regards,

Nirmoy

On 3/10/20 12:27 PM, Nirmoy Das wrote:

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 68 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h    |  3 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 77 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
  5 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 3b2370ad1e47..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,71 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr 
*mgr)

  idr_destroy(>ctx_handles);
  mutex_destroy(>lock);
  }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-    int num_compute_sched_normal = 0;
-    int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-    int i;
-
-    /* use one drm sched array, gfx.compute_sched to store both high 
and

- * normal priority drm compute schedulers */
-    for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-    if (!adev->gfx.compute_ring[i].has_high_prio)
- adev->gfx.compute_sched[num_compute_sched_normal++] =
-    >gfx.compute_ring[i].sched;
-    else
- adev->gfx.compute_sched[num_compute_sched_high--] =
-    >gfx.compute_ring[i].sched;
-    }
-
-    /* compute ring only has two priority for now */
-    i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-    adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-    adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-    i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-    adev->gfx.compute_prio_sched[i] =
-    >gfx.compute_sched[num_compute_sched_high - 1];
-    adev->gfx.num_compute_sched[i] =
-    adev->gfx.num_compute_rings - num_compute_sched_normal;
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-    int i, j;
-
-    amdgpu_ctx_init_compute_sched(adev);
-    for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-    adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-    adev->gfx.num_gfx_sched++;
-    }
-
-    for (i = 0; i < adev->sdma.num_instances; i++) {
-    adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-    adev->sdma.num_sdma_sched++;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
- adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-    >vcn.inst[i].ring_dec.sched;
-    }
-
-    for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-    if (adev->vcn.harvest_config & (1 << i))
-    continue;
-    for (j = 0; j < adev->vcn.num_enc_rings; ++j)
- adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-    >vcn.inst[i].ring_enc[j].sched;
-    }
-
-    for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-    if (adev->jpeg.harvest_config & (1 << i))
-    continue;
- adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-    >jpeg.inst[i].ring_dec.sched;
-    }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct 
amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long 
timeout);

  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  -void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  adev->gfx.config.max_cu_per_sh,
  adev->gfx.cu_info.number);
  -    amdgpu_ctx_init_sched(adev);
+    amdgpu_ring_init_sched(adev);

Re: [PATCH 1/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Nirmoy


Hi Christian,


I think we still need amdgpu_ring.has_high_prio bool. I was thinking of 
using


amdgpu_gfx_is_high_priority_compute_queue() to see if a ring is set to 
high priority


but then I realized we don't support high priority gfx queue on gfx7 and 
less.



Regards,

Nirmoy

On 3/10/20 12:27 PM, Nirmoy Das wrote:

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 68 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  3 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 77 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
  5 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3b2370ad1e47..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,71 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
  }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-   int num_compute_sched_normal = 0;
-   int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-   int i;
-
-   /* use one drm sched array, gfx.compute_sched to store both high and
-* normal priority drm compute schedulers */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   if (!adev->gfx.compute_ring[i].has_high_prio)
-   adev->gfx.compute_sched[num_compute_sched_normal++] =
-   >gfx.compute_ring[i].sched;
-   else
-   adev->gfx.compute_sched[num_compute_sched_high--] =
-   >gfx.compute_ring[i].sched;
-   }
-
-   /* compute ring only has two priority for now */
-   i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-   i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-   int i, j;
-
-   amdgpu_ctx_init_compute_sched(adev);
-   for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-   adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-   adev->gfx.num_gfx_sched++;
-   }
-
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-   adev->sdma.num_sdma_sched++;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-   >vcn.inst[i].ring_dec.sched;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   for (j = 0; j < adev->vcn.num_enc_rings; ++j)
-   adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-   >vcn.inst[i].ring_enc[j].sched;
-   }
-
-   for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-   if (adev->jpeg.harvest_config & (1 << i))
-   continue;
-   adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-   >jpeg.inst[i].ring_dec.sched;
-   }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev);

-
-
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.config.max_cu_per_sh,
adev->gfx.cu_info.number);
  
-	amdgpu_ctx_init_sched(adev);

+   amdgpu_ring_init_sched(adev);
  
  	adev->accel_working = true;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Re: [PATCH 2/2] drm/amdgpu: fix assigning nil entry in compute_prio_sched

2020-03-10 Thread Nirmoy


Please ignore this stale patch.

On 3/10/20 12:27 PM, Nirmoy Das wrote:

If there is no high priority compute queue then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 99875dd633e6..01faeb8b4ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -478,10 +478,18 @@ static void amdgpu_ring_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
  
  	i = AMDGPU_GFX_PIPE_PRIO_HIGH;

-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
  }
  
  /**

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: fix assigning nil entry in compute_prio_sched

2020-03-10 Thread Nirmoy Das

If there is no high priority compute queue then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 99875dd633e6..01faeb8b4ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -478,10 +478,18 @@ static void amdgpu_ring_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
 
i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
 }
 
 /**
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: do not set nil entry in compute_prio_sched

2020-03-10 Thread Nirmoy Das

If there are no high priority compute queues available then set normal
priority sched array to compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_HIGH]

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 99875dd633e6..01faeb8b4ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -478,10 +478,18 @@ static void amdgpu_ring_init_compute_sched(struct 
amdgpu_device *adev)
adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
 
i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
+   /* When compute has no high priority rings then use */
+   /* normal priority sched array */
+   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
+   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
+   } else {
+
+   adev->gfx.compute_prio_sched[i] =
+   >gfx.compute_sched[num_compute_sched_high - 1];
+   adev->gfx.num_compute_sched[i] =
+   adev->gfx.num_compute_rings - num_compute_sched_normal;
+   }
 }
 
 /**
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: cleanup drm_gpu_scheduler array creation

2020-03-10 Thread Nirmoy Das

Move initialization of struct drm_gpu_scheduler array,
amdgpu_ctx_init_sched() to amdgpu_ring.c.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 68 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  3 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 77 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
 5 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3b2370ad1e47..06d151c0fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -661,71 +661,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(>ctx_handles);
mutex_destroy(>lock);
 }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-   int num_compute_sched_normal = 0;
-   int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-   int i;
-
-   /* use one drm sched array, gfx.compute_sched to store both high and
-* normal priority drm compute schedulers */
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   if (!adev->gfx.compute_ring[i].has_high_prio)
-   adev->gfx.compute_sched[num_compute_sched_normal++] =
-   >gfx.compute_ring[i].sched;
-   else
-   adev->gfx.compute_sched[num_compute_sched_high--] =
-   >gfx.compute_ring[i].sched;
-   }
-
-   /* compute ring only has two priority for now */
-   i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-   adev->gfx.compute_prio_sched[i] = >gfx.compute_sched[0];
-   adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-   i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-   adev->gfx.compute_prio_sched[i] =
-   >gfx.compute_sched[num_compute_sched_high - 1];
-   adev->gfx.num_compute_sched[i] =
-   adev->gfx.num_compute_rings - num_compute_sched_normal;
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-   int i, j;
-
-   amdgpu_ctx_init_compute_sched(adev);
-   for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-   adev->gfx.gfx_sched[i] = >gfx.gfx_ring[i].sched;
-   adev->gfx.num_gfx_sched++;
-   }
-
-   for (i = 0; i < adev->sdma.num_instances; i++) {
-   adev->sdma.sdma_sched[i] = >sdma.instance[i].ring.sched;
-   adev->sdma.num_sdma_sched++;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-   >vcn.inst[i].ring_dec.sched;
-   }
-
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   if (adev->vcn.harvest_config & (1 << i))
-   continue;
-   for (j = 0; j < adev->vcn.num_enc_rings; ++j)
-   adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-   >vcn.inst[i].ring_enc[j].sched;
-   }
-
-   for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-   if (adev->jpeg.harvest_config & (1 << i))
-   continue;
-   adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-   >jpeg.inst[i].ring_dec.sched;
-   }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 572eb6ea8eab..b2a99f9fc223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3092,7 +3092,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.config.max_cu_per_sh,
adev->gfx.cu_info.number);
 
-   amdgpu_ctx_init_sched(adev);
+   amdgpu_ring_init_sched(adev);
 
adev->accel_working = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index a7e1d0425ed0..99875dd633e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -454,3 +454,80 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->sched.ready = !r;
return r;
 }
+
+static void amdgpu_ring_init_compute_sched(struct

RE: [PATCH] drm/amdgpu: resolve failed error inject msg

2020-03-10 Thread Chen, Guchun

[AMD Public Use]

Spelling typos in commit message. With below typos fixed, the patch is: 
Reviewed-by: Guchun Chen 

invoking an error injection succesfully will cause an at_event intterupt that
will occur before the invoke sequence can complete causing an invalid error

succesfully --> successfully
intterupt --> interrupt

Regards,
Guchun

From: Zhang, Hawking 
Sent: Tuesday, March 10, 2020 4:56 PM
To: Clements, John ; amd-gfx@lists.freedesktop.org; 
Chen, Guchun ; Li, Dennis ; Li, Candice 

Subject: RE: [PATCH] drm/amdgpu: resolve failed error inject msg


[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Hawking Zhang mailto:hawking.zh...@amd.com>>

Regards,
Hawking
From: Clements, John mailto:john.cleme...@amd.com>>
Sent: Tuesday, March 10, 2020 16:42
To: amd-gfx@lists.freedesktop.org; Zhang, 
Hawking mailto:hawking.zh...@amd.com>>; Chen, Guchun 
mailto:guchun.c...@amd.com>>; Li, Dennis 
mailto:dennis...@amd.com>>; Li, Candice 
mailto:candice...@amd.com>>
Subject: [PATCH] drm/amdgpu: resolve failed error inject msg


[AMD Official Use Only - Internal Distribution Only]

Submitting patch to resolve issue where during a successful error inject invoke 
the associated at_event interrupt causes a false negative and outputs an error 
in the kernel message.

Thank you,
John Clements
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: resolve failed error inject msg

2020-03-10 Thread Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
From: Clements, John 
Sent: Tuesday, March 10, 2020 16:42
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking ; 
Chen, Guchun ; Li, Dennis ; Li, Candice 

Subject: [PATCH] drm/amdgpu: resolve failed error inject msg


[AMD Official Use Only - Internal Distribution Only]

Submitting patch to resolve issue where during a successful error inject invoke 
the associated at_event interrupt causes a false negative and outputs an error 
in the kernel message.

Thank you,
John Clements
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: resolve failed error inject msg

2020-03-10 Thread Clements, John

[AMD Official Use Only - Internal Distribution Only]

Submitting patch to resolve issue where during a successful error inject invoke 
the associated at_event interrupt causes a false negative and outputs an error 
in the kernel message.

Thank you,
John Clements


0001-drm-amdgpu-resolve-failed-error-inject-msg.patch
Description: 0001-drm-amdgpu-resolve-failed-error-inject-msg.patch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in debugfs_init

2020-03-10 Thread Chen, Guchun

[AMD Public Use]

That's fine. These two patches are:

Reviewed-by: Guchun Chen 

Regards,
Guchun

-Original Message-
From: Zhou1, Tao  
Sent: Monday, March 9, 2020 6:15 PM
To: Chen, Guchun ; Yang, Stanley ; 
amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Li, Dennis ; 
Clements, John ; Yang, Stanley 
Subject: RE: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in debugfs_init

[AMD Public Use]

We already have amdgpu_ras_debugfs_remove and amdgpu_ras_debugfs_remove_all.
In fact, amdgpu_debugfs_fini has been dropped in the patch below, and we are 
also planning to delete amdgpu_ras_debugfs_remove and 
amdgpu_ras_debugfs_remove_all.

drm/amdgpu: no need to clean debugfs at amdgpu

drm_minor_unregister will invoke drm_debugfs_cleanup to clean all the child 
node under primary minor node.
We don't need to invoke amdgpu_debugfs_fini and amdgpu_debugfs_regs_cleanup to 
clean agian.
Otherwise, it will raise the NULL pointer like below.

v2: remove all debugfs cleanup/fini code at amdgpu

Signed-off-by: Yintian Tao 
Reviewed-by: Alex Deucher 

Regards,
Tao

> -Original Message-
> From: Chen, Guchun 
> Sent: 2020年3月9日 17:21
> To: Yang, Stanley ; 
> amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking ; Li, Dennis 
> ; Clements, John ; Zhou1, 
> Tao ; Yang, Stanley 
> Subject: RE: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in 
> debugfs_init
> 
> [AMD Public Use]
> 
> Shall we need other codes to remove ras debugfs in debugfs_fini to 
> avoid leak?
> 
> Regards,
> Guchun
> 
> -Original Message-
> From: Stanley.Yang 
> Sent: Monday, March 9, 2020 5:12 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking ; Chen, Guchun 
> ; Li, Dennis ; Clements, John 
> ; Zhou1, Tao ; Yang, Stanley 
> 
> Subject: [PATCH 2/2] drm/amdgpu: call ras_debugfs_create_all in 
> debugfs_init
> 
> From: Tao Zhou 
> 
> and remove each ras IP's own debugfs creation
> 
> Signed-off-by: Tao Zhou 
> Signed-off-by: Stanley.Yang 
> Change-Id: If3d16862afa0d97abad183dd6e60478b34029e95
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c   | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c| 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c| 1 -
>  8 files changed, 3 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index deca6343d0cc..1d513e4f9934 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1315,6 +1315,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt,
> NULL,  DEFINE_SIMPLE_ATTRIBUTE(fops_sclk_set, NULL,
>   amdgpu_debugfs_sclk_set, "%llu\n");
> 
> +extern void amdgpu_ras_debugfs_create_all(struct amdgpu_device 
> +*adev);
>  int amdgpu_debugfs_init(struct amdgpu_device *adev)  {
>   int r, i;
> @@ -1387,6 +1388,8 @@ int amdgpu_debugfs_init(struct amdgpu_device
> *adev)
>   }
>   }
> 
> + amdgpu_ras_debugfs_create_all(adev);
> +
>   return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
>   ARRAY_SIZE(amdgpu_debugfs_list));  } 
> diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7403588684b3..d12bb4a35780 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -565,7 +565,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device
> *adev)
>   int r;
>   struct ras_fs_if fs_info = {
>   .sysfs_name = "gfx_err_count",
> - .debugfs_name = "gfx_err_inject",
>   };
>   struct ras_ih_if ih_info = {
>   .cb = amdgpu_gfx_process_ras_data_cb, diff --git 
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> index 676c48c02d77..ead3dc572ec5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
> @@ -32,7 +32,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device 
> *adev)
>   };
>   struct ras_fs_if fs_info = {
>   .sysfs_name = "mmhub_err_count",
> - .debugfs_name = "mmhub_err_inject",
>   };
> 
>   if (!adev->mmhub.ras_if) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> index 7d5c3a9de9ea..6201a5f4b4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
> @@ -30,7 +30,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device
> *adev)
>   };
>   struct ras_fs_if fs_info = {
>   .sysfs_name = "pcie_bif_err_count",
> - .debugfs_name = "pcie_bif_err_inject",
>   };
> 
>

RE: [PATCH] drm/amdgpu: check for the existence of RAS dir before creating

2020-03-10 Thread Yang, Stanley

[AMD Official Use Only - Internal Distribution Only]

centralize all debugfs creation in one place for ras

Signed-off-by: Tao Zhou 
Signed-off-by: Stanley.Yang 
Change-Id: I7489ccb41dcf7a11ecc45313ad42940474999d81

Patches have been pushed to branch.

Regards,
Stanley

-Original Message-
From: Zhou1, Tao  
Sent: Tuesday, March 10, 2020 1:39 PM
To: Quan, Evan ; amd-gfx@lists.freedesktop.org; Yang, 
Stanley 
Cc: Quan, Evan ; Zhang, Hawking 
Subject: RE: [PATCH] drm/amdgpu: check for the existence of RAS dir before 
creating

[AMD Official Use Only - Internal Distribution Only]

The issue is fixed by:

centralize all debugfs creation in one place for ras

Signed-off-by: Tao Zhou 
Signed-off-by: Stanley.Yang 
Change-Id: I7489ccb41dcf7a11ecc45313ad42940474999d81

Hi Stanley:

Have you pushed the patch?

Regards,
Tao

> -Original Message-
> From: amd-gfx  On Behalf Of 
> Evan Quan
> Sent: 2020年3月10日 13:26
> To: amd-gfx@lists.freedesktop.org
> Cc: Quan, Evan ; Zhang, Hawking 
> 
> Subject: [PATCH] drm/amdgpu: check for the existence of RAS dir before 
> creating
> 
> To address the error message below:
> debugfs: Directory 'ras' with parent '/' already present!
> 
> Change-Id: I2539e89fdfe4e22055c3be5a48a8c0adad315f91
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index e1e8fd4b2b89..2195f6c63b50 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1075,7 +1075,9 @@ static void
> amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
>   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>   struct drm_minor *minor = adev->ddev->primary;
> 
> - con->dir = debugfs_create_dir("ras", minor->debugfs_root);
> + con->dir = debugfs_lookup("ras", minor->debugfs_root);
> + if (!con->dir)
> + con->dir = debugfs_create_dir("ras", minor->debugfs_root);
>   debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
>   adev, _ras_debugfs_ctrl_ops);
>   debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con-
> >dir,
> --
> 2.25.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.f
> reedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=02%7C01%7Ctao.zhou1%40amd.com%7C9b53a604785f4aa69
> 01808d7c4b39d50%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C
> 637194148679373493sdata=j9L0ibbh%2Fl9btsZCwOQK0D86Nrp1xR%2
> BTZWCixnSDY%2FY%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

41 matches

Mail list logo