date:20210519

Enable displaying DPM levels for VCN clocks
in swsmu supported ASICs

v2: removed set functions for navi, renoir
v3: removed set function from arcturus
v4: added missing defines in drm_table and remove
 uneeded goto label in navi10_ppt.c

Signed-off-by: David M Nieto 
Reviewed-by: Lijo Lazar 
Change-Id: I01959a97b02aa87a6deb4a89010858cc93838cd7
---
 drivers/gpu/drm/amd/display/dc/dm_pp_smu.h|  4 ++
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 46 +++
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 10 +---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  4 ++
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 33 +
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 46 +++
 6 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h 
b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index fb41140e8381..4440d08743aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -245,6 +245,8 @@ struct pp_smu_funcs_nv {
 #define PP_SMU_NUM_DCFCLK_DPM_LEVELS  8
 #define PP_SMU_NUM_FCLK_DPM_LEVELS4
 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS  4
+#define PP_SMU_NUM_DCLK_DPM_LEVELS8
+#define PP_SMU_NUM_VCLK_DPM_LEVELS8
 
 struct dpm_clock {
   uint32_t  Freq;// In MHz
@@ -258,6 +260,8 @@ struct dpm_clocks {
struct dpm_clock SocClocks[PP_SMU_NUM_SOCCLK_DPM_LEVELS];
struct dpm_clock FClocks[PP_SMU_NUM_FCLK_DPM_LEVELS];
struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
+   struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
+   struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
 };
 
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 77693bf0840c..1735a96dd307 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -822,6 +822,52 @@ static int arcturus_print_clk_levels(struct smu_context 
*smu,
now) ? "*" : ""));
break;
 
+   case SMU_VCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_VCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current vclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get vclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
+   case SMU_DCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_DCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current dclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get dclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
case SMU_PCIE:
gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu);
lane_width = smu_v11_0_get_current_pcie_link_width_level(smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 770b181239a3..78fe13183e8b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1273,6 +1273,8 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_DCLK:
case SMU_DCEFCLK:
ret = navi10_get_current_clk_freq_by_table(smu, clk_type,

Re: [PATCH v5 03/10] drm/amdgpu: Modify GC register access from MMIO to RLCG in file kfd_v10*

Am 2021-05-17 um 10:39 a.m. schrieb Peng Ju Zhou:
> In SRIOV environment, KMD should access GC registers
> with RLCG if GC indirect access flag enabled.
>
> Signed-off-by: Peng Ju Zhou 

This patch is

Reviewed-by: Felix Kuehling 


> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 42 +--
>  1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 62aa1a6f64ed..491acdf92f73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -96,8 +96,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev 
> *kgd, uint32_t vmid,
>  
>   lock_srbm(kgd, 0, 0, 0, vmid);
>  
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
> + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
> + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
>   /* APE1 no longer exists on GFX9 */
>  
>   unlock_srbm(kgd);
> @@ -161,7 +161,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, 
> uint32_t pipe_id)
>  
>   lock_srbm(kgd, mec, pipe, 0, 0);
>  
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
> + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
>   CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
>   CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
>  
> @@ -239,13 +239,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
> uint32_t pipe_id,
>  
>   for (reg = hqd_base;
>reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
> - WREG32(reg, mqd_hqd[reg - hqd_base]);
> + WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
>  
>  
>   /* Activate doorbell logic before triggering WPTR poll. */
>   data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
>CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
> + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
>  
>   if (wptr) {
>   /* Don't read wptr with get_user because the user
> @@ -274,27 +274,27 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, 
> uint32_t pipe_id,
>   guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
>   guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
>  
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
> + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
>  lower_32_bits(guessed_wptr));
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
> + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
>  upper_32_bits(guessed_wptr));
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
> + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
>  lower_32_bits((uint64_t)wptr));
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
> + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
>  upper_32_bits((uint64_t)wptr));
>   pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
>(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
> + WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
>  (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
>   }
>  
>   /* Start the EOP fetcher */
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
> + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
>  REG_SET_FIELD(m->cp_hqd_eop_rptr,
>CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
>  
>   data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
> - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
> + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
>  
>   release_queue(kgd);
>  
> @@ -365,7 +365,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd,
>   if (WARN_ON_ONCE(i >= HQD_N_REGS))  \
>   break;  \
>   (*dump)[i][0] = (addr) << 2;\
> - (*dump)[i++][1] = RREG32(addr); \
> + (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr);\
>   } while (0)
>  
>   *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> @@ -497,13 +497,13 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, 
> uint64_t queue_address,
>   uint32_t low, high;
>  
>   acquire_queue(kgd, pipe_id, queue_id);
> - act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
> + act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
>   if (act) {
>   low = lower_32_bits(queue_address >> 8);
>

RE: [PATCH] drm/amd/pm: correct MGpuFanBoost setting

2021-05-19 Thread Quan, Evan

Thanks. I will check and port this to powerplay code if necessary.

BR
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Wednesday, May 19, 2021 10:13 AM
> To: Feng, Kenneth 
> Cc: Deucher, Alexander ; Quan, Evan
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amd/pm: correct MGpuFanBoost setting
> 
> Do we need a similar check in the vega powerplay code?
> 
> Alex
> 
> 
> On Mon, May 17, 2021 at 10:59 PM Feng, Kenneth
>  wrote:
> >
> > [AMD Official Use Only]
> >
> > Reviewed-by: Kenneth Feng 
> >
> > -Original Message-
> > From: Quan, Evan 
> > Sent: Tuesday, May 18, 2021 10:05 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander ; Feng, Kenneth
> > ; Quan, Evan 
> > Subject: [PATCH] drm/amd/pm: correct MGpuFanBoost setting
> >
> > No MGpuFanBoost setting for those ASICs which do not support it.
> > Otherwise, it may breaks their fan control feature.
> >
> > Change-Id: Ifa9c87ac537a07937a0f0f6a670f21368eb29218
> > Signed-off-by: Evan Quan 
> > ---
> >  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c|  9
> +
> >  .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c| 10
> ++
> >  2 files changed, 19 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > index d2063b1e7936..f16c76038f13 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> > @@ -2936,6 +2936,8 @@ static ssize_t navi1x_get_gpu_metrics(struct
> > smu_context *smu,
> >
> >  static int navi10_enable_mgpu_fan_boost(struct smu_context *smu)  {
> > +   struct smu_table_context *table_context = >smu_table;
> > +   PPTable_t *smc_pptable = table_context->driver_pptable;
> > struct amdgpu_device *adev = smu->adev;
> > uint32_t param = 0;
> >
> > @@ -2943,6 +2945,13 @@ static int navi10_enable_mgpu_fan_boost(struct
> smu_context *smu)
> > if (adev->asic_type == CHIP_NAVI12)
> > return 0;
> >
> > +   /*
> > +* Skip the MGpuFanBoost setting for those ASICs
> > +* which do not support it
> > +*/
> > +   if (!smc_pptable->MGpuFanBoostLimitRpm)
> > +   return 0;
> > +
> > /* Workaround for WS SKU */
> > if (adev->pdev->device == 0x7312 &&
> > adev->pdev->revision == 0) diff --git
> > a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> > index 3c3a7f9233e0..159cd698323e 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> > @@ -3201,6 +3201,16 @@ static ssize_t
> > sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
> >
> >  static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context
> > *smu)  {
> > +   struct smu_table_context *table_context = >smu_table;
> > +   PPTable_t *smc_pptable = table_context->driver_pptable;
> > +
> > +   /*
> > +* Skip the MGpuFanBoost setting for those ASICs
> > +* which do not support it
> > +*/
> > +   if (!smc_pptable->MGpuFanBoostLimitRpm)
> > +   return 0;
> > +
> > return smu_cmn_send_smc_msg_with_param(smu,
> >
> > SMU_MSG_SetMGpuFanBoostLimitRpm,
> >0,
> > --
> > 2.29.0
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> > s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7Cev
> >
> an.quan%40amd.com%7Ca14d02e9c00a4d13acf808d91a6b9da0%7C3dd8961f
> e4884e6
> >
> 08e11a82d994e183d%7C0%7C0%7C637569871776869665%7CUnknown%7CT
> WFpbGZsb3d
> >
> 8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%
> 3D%7C
> >
> 1000sdata=oo3glBV4Par2AylocfT3586Y1U%2FuePBureFdUOu7j%2BQ%
> 3D
> > reserved=0
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.
> freedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7Cevan.quan%40amd.com%7Ca14d02e9c00a4d13a
> cf808d91a6b9da0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63
> 7569871776869665%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMD
> AiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=
> oo3glBV4Par2AylocfT3586Y1U%2FuePBureFdUOu7j%2BQ%3Dreserve
> d=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Add early fini callback



On 2021-05-19 11:29 p.m., Felix Kuehling wrote:

Am 2021-05-19 um 11:20 p.m. schrieb Andrey Grodzovsky:

Use it to call disply code dependent on device->drv_data
before it's set to NULL on device unplug

v5:
Move HW finilization into this callback to prevent MMIO accesses
post cpi remove.

v7:
Split kfd suspend from device exit to expdite HW related
stuff to amdgpu_pci_remove

v8:
Squash previous KFD commit into this commit to avoid compile break.

Signed-off-by: Andrey Grodzovsky 
Acked-by: Christian König 

See one cosmetic comment inline. With that fixed the patch is

Reviewed-by: Felix Kuehling 



Thanks for quick response, updated.
Since this was last commit to review I also pushed the series to
drm-misc-next.

Andrey





---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 59 +--
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  3 +-
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
  drivers/gpu/drm/amd/include/amd_shared.h  |  2 +
  6 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f6696a3c778..2b06dee9a0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
  }
  
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)

+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
  {
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5ffb07b02810..d8a537e8aea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8bee95ad32d9..bc75e35dd8d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
return 0;
  }
  
-/**

- * amdgpu_device_ip_fini - run fini for hardware IPs
- *
- * @adev: amdgpu_device pointer
- *
- * Main teardown pass for hardware IPs.  The list of all the hardware
- * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
- * are run.  hw_fini tears down the hardware associated with each IP
- * and sw_fini tears down any software state associated with each IP.
- * Returns 0 on success, negative error code on failure.
- */
-static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
  {
int i, r;
  
-	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)

-   amdgpu_virt_release_ras_err_handler_data(adev);
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   if (!adev->ip_blocks[i].version->funcs->early_fini)
+   continue;
  
-	amdgpu_ras_pre_fini(adev);

+   r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+   if (r) {
+   DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+   }
+   }
  
-	if (adev->gmc.xgmi.num_physical_nodes > 1)

-   amdgpu_xgmi_remove_device(adev);
+   amdgpu_amdkfd_suspend(adev, false);
  
  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);

amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
  
-	amdgpu_amdkfd_device_fini(adev);

-
/* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
@@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device 
*adev)
adev->ip_blocks[i].status.hw = false;
}
  
+	return 0;

+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run.  hw_fini tears down

[PATCH 1/5] amdgpu/pm: reorder definition of swsmu_pm_funcs for readability

Match the order of definition to the structure's declaration to
help with locating included and missing functions of the API

Signed-off-by: Darren Powell 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 48 +++
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 285849cef9f2..8aff67a667fa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2962,6 +2962,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.get_fan_control_mode= smu_get_fan_control_mode,
.set_fan_speed_percent   = smu_set_fan_speed_percent,
.get_fan_speed_percent   = smu_get_fan_speed_percent,
+   .force_clock_level   = smu_force_ppclk_levels,
+   .print_clock_levels  = smu_print_ppclk_levels,
.force_performance_level = smu_force_performance_level,
.read_sensor = smu_read_sensor,
.get_performance_level   = smu_get_performance_level,
@@ -2974,38 +2976,36 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.switch_power_profile= smu_switch_power_profile,
/* export to amdgpu */
.dispatch_tasks  = smu_handle_dpm_task,
+   .load_firmware   = smu_load_microcode,
.set_powergating_by_smu  = smu_dpm_set_power_gate,
.set_power_limit = smu_set_power_limit,
+   .get_power_profile_mode  = smu_get_power_profile_mode,
+   .set_power_profile_mode  = smu_set_power_profile_mode,
.odn_edit_dpm_table  = smu_od_edit_dpm_table,
.set_mp1_state   = smu_set_mp1_state,
+   .gfx_state_change_set= smu_gfx_state_change_set,
/* export to DC */
-   .get_sclk= smu_get_sclk,
-   .get_mclk= smu_get_mclk,
-   .enable_mgpu_fan_boost   = smu_enable_mgpu_fan_boost,
-   .get_asic_baco_capability = smu_get_baco_capability,
-   .set_asic_baco_state = smu_baco_set_state,
-   .get_ppfeature_status= smu_sys_get_pp_feature_mask,
-   .set_ppfeature_status= smu_sys_set_pp_feature_mask,
-   .asic_reset_mode_2   = smu_mode2_reset,
-   .set_df_cstate   = smu_set_df_cstate,
-   .set_xgmi_pstate = smu_set_xgmi_pstate,
-   .get_gpu_metrics = smu_sys_get_gpu_metrics,
-   .set_power_profile_mode  = smu_set_power_profile_mode,
-   .get_power_profile_mode  = smu_get_power_profile_mode,
-   .force_clock_level   = smu_force_ppclk_levels,
-   .print_clock_levels  = smu_print_ppclk_levels,
-   .get_uclk_dpm_states = smu_get_uclk_dpm_states,
-   .get_dpm_clock_table = smu_get_dpm_clock_table,
-   .display_configuration_change= smu_display_configuration_change,
-   .get_clock_by_type_with_latency  = 
smu_get_clock_by_type_with_latency,
-   .display_clock_voltage_request   = 
smu_display_clock_voltage_request,
-   .set_active_display_count= smu_set_display_count,
-   .set_min_deep_sleep_dcefclk  = smu_set_deep_sleep_dcefclk,
+   .get_sclk = smu_get_sclk,
+   .get_mclk = smu_get_mclk,
+   .display_configuration_change = smu_display_configuration_change,
+   .get_clock_by_type_with_latency   = smu_get_clock_by_type_with_latency,
+   .display_clock_voltage_request= smu_display_clock_voltage_request,
+   .enable_mgpu_fan_boost= smu_enable_mgpu_fan_boost,
+   .set_active_display_count = smu_set_display_count,
+   .set_min_deep_sleep_dcefclk   = smu_set_deep_sleep_dcefclk,
+   .get_asic_baco_capability = smu_get_baco_capability,
+   .set_asic_baco_state  = smu_baco_set_state,
+   .get_ppfeature_status = smu_sys_get_pp_feature_mask,
+   .set_ppfeature_status = smu_sys_set_pp_feature_mask,
+   .asic_reset_mode_2= smu_mode2_reset,
+   .set_df_cstate= smu_set_df_cstate,
+   .set_xgmi_pstate  = smu_set_xgmi_pstate,
+   .get_gpu_metrics  = smu_sys_get_gpu_metrics,
.set_watermarks_for_clock_ranges = 
smu_set_watermarks_for_clock_ranges,
.display_disable_memory_clock_switch = 
smu_display_disable_memory_clock_switch,
.get_max_sustainable_clocks_by_dc= 
smu_get_max_sustainable_clocks_by_dc,
-   .load_firmware   = smu_load_microcode,
-   .gfx_state_change_set= smu_gfx_state_change_set,
+   .get_uclk_dpm_states  = smu_get_uclk_dpm_states,
+   .get_dpm_clock_table  = smu_get_dpm_clock_table,
.get_smu_prv_buf_details = smu_get_prv_buffer_details,
 };
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

[PATCH 2/5] amdgpu/pm: simplify logic of smu_get_power_level

 new powerplay enumeration pp_power_limit_level
 modify hwmon show_power functions to use pp_power_limit_level
 remove insertion of byte field into smu_get_power_level output arg "limit"
 modify smu_get_power_level to use pp_power_limit_level
 simplify logic of smu_get_power_level

* Test
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
11`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

* Test (VANGOGH only)
 echo "=== power2 cap ===" ; cat $HWMON_DIR/power2_cap ;   \
 echo "=== power2 cap max ===" ; cat $HWMON_DIR/power2_cap_max ;   \
 echo "=== power2 cap def ===" ; cat $HWMON_DIR/power2_cap_default

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 11 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 18 ---
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 47 ++-
 4 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e2d13131a432..cf98b9afb362 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -192,6 +192,17 @@ enum pp_df_cstate {
DF_CSTATE_ALLOW,
 };
 
+enum pp_power_limit_level
+{
+   PP_PWR_LIMIT_MIN = -1,
+   PP_PWR_LIMIT_CURRENT,
+   PP_PWR_LIMIT_DEFAULT,
+   PP_PWR_LIMIT_MAX,
+   PP_PWR_LIMIT_FAST_CURRENT,
+   PP_PWR_LIMIT_FAST_DEFAULT,
+   PP_PWR_LIMIT_FAST_MAX,
+};
+
 #define PP_GROUP_MASK0xF000
 #define PP_GROUP_SHIFT   28
 
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 13da377888d2..bd5af70ac739 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2718,7 +2718,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   uint32_t limit;
+   enum pp_power_limit_level limit_level;
uint32_t max_limit = 0;
ssize_t size;
int r;
@@ -2734,8 +2735,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
return r;
}
 
+   limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_MAX : PP_PWR_LIMIT_MAX;
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_MAX);
+   smu_get_power_limit(>smu, , limit_level);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
@@ -2758,7 +2760,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   uint32_t limit;
+   enum pp_power_limit_level limit_level;
ssize_t size;
int r;
 
@@ -2773,8 +2776,9 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
return r;
}
 
+   limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_CURRENT : 
PP_PWR_LIMIT_CURRENT;
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_CURRENT);
+   smu_get_power_limit(>smu, , limit_level);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
@@ -2797,7 +2801,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct 
device *dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   uint32_t limit;
+   enum pp_power_limit_level limit_level;
ssize_t size;
int r;
 
@@ -2812,8 +2817,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct 
device *dev,
return r;
}
 
+   limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_DEFAULT : 
PP_PWR_LIMIT_DEFAULT;
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_DEFAULT);
+

[PATCH 0/5] Modify smu_get_power_limit to implement Powerplay API

=== Description ===
Modify smu_get_power_limit to implement Powerplay API

=== Test System ===
* DESKTOP(AMD FX-8350 + NAVI10(731F/ca), BIOS: F2)
 + ISO(Ubuntu 20.04.1 LTS)
 + Kernel(5.11.0-custom-fdoagd5f)

=== Patch Summary ===
   linux: (g...@gitlab.freedesktop.org:agd5f) origin/amd-staging-drm-next @ 
b1d634be9673
+ 538c6ba2ec30 amdgpu/pm: reorder definition of swsmu_pm_funcs for 
readability
+ 7b3ff20b1454 amdgpu/pm: simplify logic of smu_get_power_level
+ 72f426c7d850 amdgpu/pm: modify Powerplay API get_power_limit to use 
pp_power_limit_level
+ e6618a44993a amdgpu/pm: modify smu_get_power_limit to implement Powerplay 
API
+ 16fb37b834e4 amdgpu/pm: add kernel documentation for smu_get_power_limit


=== Tests ===
 get_power_limit Test 
* Test 
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
10`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

 Documentation Test 
* Insert temp documentation
** Documentation/gpu/amdgpu.rst
 vi Documentation/gpu/amdgpu.rst
** added text to start
START
Test Documentation
==

smu_get_power_limit
---
.. kernel-doc:: drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
   :identifiers: smu_get_power_limit

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_limit_level
-END-

* Setup
 cd ~/workspace/linux
 . sphinx_2.4.4/bin/activate

* Build
 export SPHINXDOCLOG=sphinx.build.log
 cp $SPHINXDOCLOG{,.old}
 time make -j 8 htmldocs |& tee $SPHINXDOCLOG

* View
 firefox 
file:///home/dapowell/workspace/linux/Documentation/output/gpu/amdgpu.html

Darren Powell (5):
  amdgpu/pm: reorder definition of swsmu_pm_funcs for readability
  amdgpu/pm: simplify logic of smu_get_power_level
  amdgpu/pm: modify Powerplay API get_power_limit to use
pp_power_limit_level
  amdgpu/pm: modify smu_get_power_limit to implement Powerplay API
  amdgpu/pm: add kernel documentation for smu_get_power_limit

 .../gpu/drm/amd/include/kgd_pp_interface.h|  36 +-
 drivers/gpu/drm/amd/pm/amdgpu_pm.c|  61 +-
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |   5 +-
 .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  |  29 +++--
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 110 --
 5 files changed, 159 insertions(+), 82 deletions(-)


base-commit: b1d634be967396d371e620685658224f28ba6247
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/5] amdgpu/pm: modify smu_get_power_limit to implement Powerplay API

 modify smu_get_power_limit to match Powerplay .get_power_limit signature
 add smu_get_power_limit to swsmu_pm_funcs
 simplify calling functions to use Powerplay API rather than direct call

* Test
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
11`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

* Test (VANGOGH only)
 echo "=== power2 cap ===" ; cat $HWMON_DIR/power2_cap ;   \
 echo "=== power2 cap max ===" ; cat $HWMON_DIR/power2_cap_max ;   \
 echo "=== power2 cap def ===" ; cat $HWMON_DIR/power2_cap_default

Signed-off-by: Darren Powell 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 48 +++
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  5 +--
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  5 ++-
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 6130318dd993..1e8f9e8c13a2 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2735,16 +2735,16 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
}
 
limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_MAX : PP_PWR_LIMIT_MAX;
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , limit_level);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , PP_PWR_LIMIT_MAX);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle,
+   , limit_level);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -2776,16 +2776,16 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct 
device *dev,
}
 
limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_CURRENT : 
PP_PWR_LIMIT_CURRENT;
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , limit_level);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , PP_PWR_LIMIT_CURRENT);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle,
+   , limit_level);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -2817,16 +2817,16 @@ static ssize_t 
amdgpu_hwmon_show_power_cap_default(struct device *dev,
}
 
limit_level = (limit_type) ? PP_PWR_LIMIT_FAST_DEFAULT : 
PP_PWR_LIMIT_DEFAULT;
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , limit_level);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , PP_PWR_LIMIT_DEFAULT);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle,
+   , limit_level);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 6bdd112d64cb..3fbc5f7bf048 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1260,9 +1260,8 @@ enum smu_cmn2asic_mapping_type {
[profile] = {1, (workload)}
 
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
-int smu_get_power_limit(struct

[PATCH 3/5] amdgpu/pm: modify Powerplay API get_power_limit to use pp_power_limit_level

 modify Powerplay API get_power_limit to use pp_power_limit_level
 update Powerplay API get_power_limit calls to use pp_power_limit_level
 modify pp_get_power_limit to use new Powerplay API
 add new error return to pp_get_power_limit for unhandled pp_power_limit_level

* Test (non smu)
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
11`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h|  4 +--
 drivers/gpu/drm/amd/pm/amdgpu_pm.c|  9 +++---
 .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 29 ---
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index cf98b9afb362..606c89eb206f 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -302,8 +302,8 @@ struct amd_pm_funcs {
uint32_t block_type, bool gate);
int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id);
int (*set_power_limit)(void *handle, uint32_t n);
-   int (*get_power_limit)(void *handle, uint32_t *limit, uint32_t 
*max_limit,
-   bool default_limit);
+   int (*get_power_limit)(void *handle, uint32_t *limit,
+  enum pp_power_limit_level pwr_limit_level);
int (*get_power_profile_mode)(void *handle, char *buf);
int (*set_power_profile_mode)(void *handle, long *input, uint32_t size);
int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, 
uint32_t size);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index bd5af70ac739..6130318dd993 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2720,7 +2720,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
int limit_type = to_sensor_dev_attr(attr)->index;
uint32_t limit;
enum pp_power_limit_level limit_level;
-   uint32_t max_limit = 0;
ssize_t size;
int r;
 
@@ -2741,8 +2740,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , _limit, true);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", max_limit * 100);
+   , PP_PWR_LIMIT_MAX);
+   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else {
size = snprintf(buf, PAGE_SIZE, "\n");
}
@@ -2782,7 +2781,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , NULL, false);
+   , PP_PWR_LIMIT_CURRENT);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else {
size = snprintf(buf, PAGE_SIZE, "\n");
@@ -2823,7 +2822,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct 
device *dev,
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , NULL, true);
+   , PP_PWR_LIMIT_DEFAULT);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else {
size = snprintf(buf, PAGE_SIZE, "\n");
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index c73504e998e5..833e2d3f8f41 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1035,31 +1035,38 @@ static int pp_set_power_limit(void *handle, uint32_t 
limit)
 }
 
 static int pp_get_power_limit(void *handle, uint32_t *limit,
-   uint32_t *max_limit, bool default_limit)
+ enum pp_power_limit_level pwr_limit_level)
 {
struct pp_hwmgr *hwmgr = handle;
+   int ret = 0;
 
if (!hwmgr || !hwmgr->pm_en ||!limit)
return -EINVAL;
 
mutex_lock(>smu_lock);
 
-   if (default_limit) {
-   *limit =

[PATCH 5/5] amdgpu/pm: add kernel documentation for smu_get_power_limit

Test:
* Temporary insertion into Documentation/gpu/amdgpu.rst
START
Test Documentation
==

smu_get_power_limit
---
.. kernel-doc:: drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
   :identifiers: smu_get_power_limit

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_limit_level
-END-

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 21 +++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 606c89eb206f..e86eecdf6a76 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -192,6 +192,27 @@ enum pp_df_cstate {
DF_CSTATE_ALLOW,
 };
 
+/**
+ * enum pp_power_limit_level - Used to query the power limits
+ *
+ * APU power is managed to system-level requirements through the PPT
+ * (package power tracking) feature. PPT is intended to limit power to the
+ * requirements of the power source and could be dynamically updated to
+ * maximize APU performance within the system power budget.
+ *
+ * PP_PWR_LIMIT_* manages the configurable, thermally significant
+ * moving average of APU power (default ~5000 ms).
+ * PP_PWR_LIMIT_FAST_* manages the ~10 ms moving average of APU power,
+ * where supported.
+ *
+ * @PP_PWR_LIMIT_MIN: Minimum Power Limit
+ * @PP_PWR_LIMIT_CURRENT: Current Power Limit (Long Window)
+ * @PP_PWR_LIMIT_DEFAULT: Default Power Limit (Long Window)
+ * @PP_PWR_LIMIT_MAX: Maximum Power Limit (Long Window)
+ * @PP_PWR_LIMIT_FAST_CURRENT:  Current Power Limit (Short Window)
+ * @PP_PWR_LIMIT_FAST_DEFAULT: Default Power Limit (Short Window)
+ * @PP_PWR_LIMIT_FAST_MAX: Maximum Power Limit (Short Window)
+ */
 enum pp_power_limit_level
 {
PP_PWR_LIMIT_MIN = -1,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 2815e932580b..57f416732265 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2166,6 +2166,16 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed)
return ret;
 }
 
+/**
+ * smu_get_power_limit - Request one of the SMU Power Limits
+ *
+ * @handle: pointer to smu context
+ * @limit: requested limit is written back to this variable
+ * @pwr_limit_level: _power_limit_level which power limit to return
+ *
+ * Return:  0 on success, <0 on error
+ *
+ */
 int smu_get_power_limit(void *handle, uint32_t *limit,
enum pp_power_limit_level pwr_limit_level)
 {
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v5 08/10] drm/amdgpu: Modify MMHUB register access from MMIO to RLCG in file mmhub_v2*

On Mon, May 17, 2021 at 10:39 AM Peng Ju Zhou  wrote:
>
> From: pengzhou 
>
> In SRIOV environment, KMD should access GC registers
> with RLCG if GC indirect access flag enabled.
>
> Signed-off-by: pengzhou 

Patches 1-8 are:
Reviewed-by: Alex Deucher 
See my comments on patch 9.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 37 +
>  1 file changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> index ac76081b91d5..e24225b3d42a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> @@ -29,6 +29,7 @@
>  #include "mmhub/mmhub_2_0_0_default.h"
>  #include "navi10_enum.h"
>
> +#include "gc/gc_10_1_0_offset.h"
>  #include "soc15_common.h"
>
>  #define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid  0x064d
> @@ -165,11 +166,11 @@ static void mmhub_v2_0_setup_vm_pt_regs(struct 
> amdgpu_device *adev, uint32_t vmi
>  {
> struct amdgpu_vmhub *hub = >vmhub[AMDGPU_MMHUB_0];
>
> -   WREG32_SOC15_OFFSET(MMHUB, 0, 
> mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
> +   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
> mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
> hub->ctx_addr_distance * vmid,
> lower_32_bits(page_table_base));
>
> -   WREG32_SOC15_OFFSET(MMHUB, 0, 
> mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
> +   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
> mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
> hub->ctx_addr_distance * vmid,
> upper_32_bits(page_table_base));
>  }
> @@ -180,14 +181,14 @@ static void mmhub_v2_0_init_gart_aperture_regs(struct 
> amdgpu_device *adev)
>
> mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
>
> -   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
>  (u32)(adev->gmc.gart_start >> 12));
> -   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
>  (u32)(adev->gmc.gart_start >> 44));
>
> -   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
>  (u32)(adev->gmc.gart_end >> 12));
> -   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
>  (u32)(adev->gmc.gart_end >> 44));
>  }
>
> @@ -197,9 +198,9 @@ static void mmhub_v2_0_init_system_aperture_regs(struct 
> amdgpu_device *adev)
> uint32_t tmp;
>
> /* Program the AGP BAR */
> -   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
> -   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
> -   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 
> 24);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 
> 24);
>
> if (!amdgpu_sriov_vf(adev)) {
> /* Program the system aperture low logical page number. */
> @@ -308,7 +309,7 @@ static void mmhub_v2_0_enable_system_domain(struct 
> amdgpu_device *adev)
> tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
> tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
> -   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
>  }
>
>  static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
> @@ -370,16 +371,16 @@ static void mmhub_v2_0_setup_vmid_config(struct 
> amdgpu_device *adev)
> tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
> !adev->gmc.noretry);
> -   WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
> +   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
> i * hub->ctx_distance, tmp);
> -   WREG32_SOC15_OFFSET(MMHUB, 0, 
> mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
> +   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
> mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
> i * hub->ctx_addr_distance, 0);
> -   WREG32_SOC15_OFFSET(MMHUB, 0, 
> mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
> +   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
> mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
> i * hub->ctx_addr_distance, 0);
> -

Re: [PATCH v5 10/10] drm/amdgpu: Skip the program of MMMC_VM_AGP_* in SRIOV

On Mon, May 17, 2021 at 10:39 AM Peng Ju Zhou  wrote:
>
> KMD should not program these registers, the value were
> defined in the host, so skip them in the SRIOV environment.
>
> Signed-off-by: Peng Ju Zhou 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> index e24225b3d42a..422d106a650b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> @@ -197,12 +197,12 @@ static void mmhub_v2_0_init_system_aperture_regs(struct 
> amdgpu_device *adev)
> uint64_t value;
> uint32_t tmp;
>
> -   /* Program the AGP BAR */
> -   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
> -   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 
> 24);
> -   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 
> 24);
> -
> if (!amdgpu_sriov_vf(adev)) {
> +   /* Program the AGP BAR */
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, 
> adev->gmc.agp_start >> 24);
> +   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, 
> adev->gmc.agp_end >> 24);
> +
> /* Program the system aperture low logical page number. */
> WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
>  min(adev->gmc.fb_start, adev->gmc.agp_start) >> 
> 18);
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v5 09/10] drm/amdgpu: Use PSP to program IH_RB_CNTL* registers

On Mon, May 17, 2021 at 10:39 AM Peng Ju Zhou  wrote:
>
> use psp to program IH_RB_CNTL* if indirect access
> for ih enabled in SRIOV environment.
>
> Signed-off-by: Victor 
> Signed-off-by: Peng Ju Zhou 
> ---
>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 19 +--
>  drivers/gpu/drm/amd/amdgpu/nv.c|  2 +-
>  2 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 
> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index f4e4040bbd25..2e69cf8db072 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -151,7 +151,14 @@ static int navi10_ih_toggle_ring_interrupts(struct 
> amdgpu_device *adev,
> /* enable_intr field is only valid in ring0 */
> if (ih == >irq.ih)
> tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 
> : 0));
> -   WREG32(ih_regs->ih_rb_cntl, tmp);
> +   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
> +   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
> +   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> +   return -ETIMEDOUT;
> +   }
> +   } else {
> +   WREG32(ih_regs->ih_rb_cntl, tmp);
> +   }
>
> if (enable) {
> ih->enabled = true;
> @@ -261,7 +268,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device 
> *adev,
> tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
> tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
> }
> -   WREG32(ih_regs->ih_rb_cntl, tmp);
> +
> +   if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
> +   if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) {
> +   DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> +   return -ETIMEDOUT;
> +   }
> +   } else {
> +   WREG32(ih_regs->ih_rb_cntl, tmp);
> +   }
>
> if (ih == >irq.ih) {
> /* set the ih ring 0 writeback address whether it's enabled 
> or not */
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index a9ad28fb55b3..b9c9c4d4606c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -845,8 +845,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
> case CHIP_NAVI12:
> amdgpu_device_ip_block_add(adev, _common_ip_block);
> amdgpu_device_ip_block_add(adev, _v10_0_ip_block);
> -   amdgpu_device_ip_block_add(adev, _ih_ip_block);
> amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
> +   amdgpu_device_ip_block_add(adev, _ih_ip_block);

Is it safe to change the order like this on bare metal?  Please look
at what was done for vega and sienna cichlid.  Something like that is
probably a better bet.

Alex


> if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
> amdgpu_device_ip_block_add(adev, _v11_0_ip_block);
> if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Let userptr BO ttm have TTM_PAGE_FLAG_SG set

I think this works for KFD userptr BOs. But this problem is probably not
specific to KFD. It's only most obvious with KFD because we rely so
heavily for userptrs.

I don't really understand why we're messing with TTM_PAGE_FLAG_SG in
amdgpu_ttm_tt_populate and amdgpu_ttm_tt_unpopulate. And why are userptr
BOs created as ttm_bo_type_device, not ttm_bo_type_sg? Christian, do you
know about the history of this code?

Either way, the patch is

Acked-by: Felix Kuehling 

Thanks for looking into this!

Regards,
  Felix

Am 2021-05-19 um 11:15 p.m. schrieb xinhui pan:
> We have met memory corruption due to unexcepted swapout/swapin.
>
> swapout function create one swap storage which is filled with zero. And
> set ttm->page_flags as TTM_PAGE_FLAG_SWAPPED. But because userptr BO ttm
> has no backend page at that time, no real data is swapout to swap
> storage.
>
> swapin function is called during userptr BO populate as
> TTM_PAGE_FLAG_SWAPPED is set. Now here is the problem, we swapin data to
> ttm bakend memory from swap storage. That just causes the memory been
> overwritten.
>
> CPU 1 CPU 2
> kfd alloc BO A(userptr) alloc BO B(GTT)
> ->init -> validate(create ttm)-> init -> validate -> populate
> init_user_pages   -> swapout BO A
> -> get_user_pages (fill up ttm->pages)
>  -> validate -> populate
>   -> swapin BO A // memory overwritten
>
> To fix this issue, we can set TTM_PAGE_FLAG_SG when we create userptr BO
> ttm. Then swapout function would not swap it.
>
> Signed-off-by: xinhui pan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 4 
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 928e8d57cd08..9a6ea966ddb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1410,7 +1410,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>   domain = AMDGPU_GEM_DOMAIN_GTT;
>   alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
> - alloc_flags = 0;
> + alloc_flags = AMDGPU_AMDKFD_CREATE_USERPTR_BO;
>   if (!offset || !*offset)
>   return -EINVAL;
>   user_addr = untagged_addr(*offset);
> @@ -1477,8 +1477,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   }
>   bo->kfd_bo = *mem;
>   (*mem)->bo = bo;
> - if (user_addr)
> - bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
>  
>   (*mem)->va = va;
>   (*mem)->domain = domain;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c7f5cc503601..5b3f45637fb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1119,6 +1119,10 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
> ttm_buffer_object *bo,
>   kfree(gtt);
>   return NULL;
>   }
> +
> + if (abo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO)
> + gtt->ttm.page_flags |= TTM_PAGE_FLAG_SG;
> +
>   return >ttm;
>  }
>  
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] add vbios info query

2021-05-19 Thread Jiawei Gu

Signed-off-by: Jiawei Gu 
---
 src/app/CMakeLists.txt |  1 +
 src/app/main.c |  8 +
 src/app/vbios.c| 53 ++
 src/lib/lowlevel/linux/query_drm.c | 11 +++
 src/umr.h  | 11 +++
 src/umrapp.h   |  1 +
 6 files changed, 85 insertions(+)
 create mode 100644 src/app/vbios.c

diff --git a/src/app/CMakeLists.txt b/src/app/CMakeLists.txt
index ca7d46b..462e4fc 100644
--- a/src/app/CMakeLists.txt
+++ b/src/app/CMakeLists.txt
@@ -35,6 +35,7 @@ add_library(umrapp STATIC
   pp_table.c
   navi10_ppt.c
   read_metrics.c
+  vbios.c
   ${GUI_SOURCE}
 )
 
diff --git a/src/app/main.c b/src/app/main.c
index 47ddb38..b484cf3 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -825,6 +825,11 @@ int main(int argc, char **argv)
asic = get_asic();
ih_self_test(asic);
 #endif
+   } else if (!strcmp(argv[i], "--vbios_info") || !strcmp(argv[i], 
"-vi")) {
+   if (!asic)
+   asic = get_asic();
+   if (umr_print_vbios_info(asic) != 0)
+   fprintf(stderr, "[ERROR]: Cannot print vbios 
info.\n");
} else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], 
"-h")) {
printf("User Mode Register debugger v%s for AMDGPU 
devices (build: %s [%s]), Copyright (c) 2021, AMD Inc.\n"
 "\n*** Device Selection ***\n"
@@ -951,6 +956,9 @@ printf(
"\n\t\tPrint the GPU metrics table for the device."
 "\n\t--power, -p \n\t\tRead the conetent of clocks, temperature, gpu loading 
at runtime"
"\n\t\toptions 'use_colour' to colourize output \n");
+printf(
+"\n*** Video BIOS Information ***\n"
+"\n\t--vbios_info, -vi \n\t\tPrint Video BIOS information\n");
 
 #if UMR_GUI
 printf(
diff --git a/src/app/vbios.c b/src/app/vbios.c
new file mode 100644
index 000..fa0a3a3
--- /dev/null
+++ b/src/app/vbios.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis 
+ *
+ */
+#include "umrapp.h"
+
+#define AMDGPU_INFO_VBIOS  0x1B
+#define AMDGPU_INFO_VBIOS_INFO 0x3
+int umr_print_vbios_info(struct umr_asic *asic)
+{
+   char fname[64];
+   int r;
+   struct umr_vbios_info vbios_info;
+
+   if (asic->fd.drm < 0) {
+   snprintf(fname, sizeof(fname)-1, "/dev/dri/card%d", 
asic->instance);
+   asic->fd.drm = open(fname, O_RDWR);
+   }
+
+   r = umr_query_drm_vbios(asic, AMDGPU_INFO_VBIOS, AMDGPU_INFO_VBIOS_INFO,
+   _info, sizeof(vbios_info));
+   if (r)
+   return r;
+
+   printf("vbios name  : %s\n", vbios_info.name);
+   printf("vbios pn: %s\n", vbios_info.vbios_pn);
+   printf("vbios version   : %d\n", vbios_info.version);
+   printf("vbios ver_str   : %s\n", vbios_info.vbios_ver_str);
+   printf("vbios date  : %s\n", vbios_info.date);
+
+   close(asic->fd.drm);
+   return 0;
+}
\ No newline at end of file
diff --git a/src/lib/lowlevel/linux/query_drm.c 
b/src/lib/lowlevel/linux/query_drm.c
index d0c82d4..f4ab709 100644
--- a/src/lib/lowlevel/linux/query_drm.c
+++ b/src/lib/lowlevel/linux/query_drm.c
@@ -49,7 +49,18 @@ int umr_query_drm(struct umr_asic *asic, int field, void 
*ret, int size)
inf.return_size = size;
inf.query = field;
return ioctl(asic->fd.drm, DRM_IOC(DRM_IOC_WRITE, DRM_IOCTL_BASE, 
DRM_COMMAND_BASE + DRM_AMDGPU_INFO, sizeof(inf)), );
+}
 
+int umr_query_drm_vbios(struct umr_asic *asic, int field, int type, void *ret, 
int size)
+{
+   struct drm_amdgpu_info inf;
+
+   memset(, 0, sizeof inf);
+   inf.return_pointer = (uintptr_t)ret;
+   inf.return_size = size;
+

Re: [PATCH] drm/amdgpu: Add early fini callback

Am 2021-05-19 um 11:20 p.m. schrieb Andrey Grodzovsky:
> Use it to call disply code dependent on device->drv_data
> before it's set to NULL on device unplug
>
> v5:
> Move HW finilization into this callback to prevent MMIO accesses
> post cpi remove.
>
> v7:
> Split kfd suspend from device exit to expdite HW related
> stuff to amdgpu_pci_remove
>
> v8:
> Squash previous KFD commit into this commit to avoid compile break.
>
> Signed-off-by: Andrey Grodzovsky 
> Acked-by: Christian König 

See one cosmetic comment inline. With that fixed the patch is

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 59 +--
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  3 +-
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
>  drivers/gpu/drm/amd/include/amd_shared.h  |  2 +
>  6 files changed, 56 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 5f6696a3c778..2b06dee9a0ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   }
>  }
>  
> -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
> +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
>  {
>   if (adev->kfd.dev) {
>   kgd2kfd_device_exit(adev->kfd.dev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 5ffb07b02810..d8a537e8aea5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>   const void *ih_ring_entry);
>  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
>  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
> -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
> +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
>  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>   uint32_t vmid, uint64_t gpu_addr,
>   uint32_t *ib_cmd, uint32_t ib_len);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 8bee95ad32d9..bc75e35dd8d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct 
> amdgpu_device *adev)
>   return 0;
>  }
>  
> -/**
> - * amdgpu_device_ip_fini - run fini for hardware IPs
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Main teardown pass for hardware IPs.  The list of all the hardware
> - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
> - * are run.  hw_fini tears down the hardware associated with each IP
> - * and sw_fini tears down any software state associated with each IP.
> - * Returns 0 on success, negative error code on failure.
> - */
> -static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
> +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
>  {
>   int i, r;
>  
> - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
> - amdgpu_virt_release_ras_err_handler_data(adev);
> + for (i = 0; i < adev->num_ip_blocks; i++) {
> + if (!adev->ip_blocks[i].version->funcs->early_fini)
> + continue;
>  
> - amdgpu_ras_pre_fini(adev);
> + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
> + if (r) {
> + DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
> +   adev->ip_blocks[i].version->funcs->name, r);
> + }
> + }
>  
> - if (adev->gmc.xgmi.num_physical_nodes > 1)
> - amdgpu_xgmi_remove_device(adev);
> + amdgpu_amdkfd_suspend(adev, false);
>  
>   amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
>   amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
>  
> - amdgpu_amdkfd_device_fini(adev);
> -
>   /* need to disable SMC first */
>   for (i = 0; i < adev->num_ip_blocks; i++) {
>   if (!adev->ip_blocks[i].status.hw)
> @@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device 
> *adev)
>   adev->ip_blocks[i].status.hw = false;
>   }
>  
> + return 0;
> +}
> +
> +/**
> + * amdgpu_device_ip_fini - run fini for hardware IPs
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Main teardown pass for hardware IPs.  The list of all the hardware
> + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
> + * are run.  hw_fini tears down the

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

On Wed, May 19, 2021 at 10:59 PM Jiawei Gu  wrote:
>
> Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.
>
> Provides a way for the user application to get the VBIOS
> information without having to parse the binary.
> It is useful for the user to be able to display in a simple way the VBIOS
> version in their system if they happen to encounter an issue.
>
> V2:
> Use numeric serial.
> Parse and expose vbios version string.
>
> V3:
> Remove redundant data in drm_amdgpu_info_vbios struct.
>
> V4:
> 64 bit alignment in drm_amdgpu_info_vbios.
>
> Signed-off-by: Jiawei Gu 

Assuming you send out the updated umr patch,
Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  15 ++
>  drivers/gpu/drm/amd/amdgpu/atom.c  | 172 +
>  drivers/gpu/drm/amd/amdgpu/atom.h  |  10 ++
>  drivers/gpu/drm/amd/include/atomfirmware.h |   5 +
>  include/uapi/drm/amdgpu_drm.h  |  11 ++
>  5 files changed, 213 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 8d12e474745a..524e4fe5efe8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -861,6 +861,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
> min((size_t)size, 
> (size_t)(bios_size - bios_offset)))
> ? -EFAULT : 0;
> }
> +   case AMDGPU_INFO_VBIOS_INFO: {
> +   struct drm_amdgpu_info_vbios vbios_info = {};
> +   struct atom_context *atom_context;
> +
> +   atom_context = adev->mode_info.atom_context;
> +   memcpy(vbios_info.name, atom_context->name, 
> sizeof(atom_context->name));
> +   memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
> sizeof(atom_context->vbios_pn));
> +   vbios_info.version = atom_context->version;
> +   memcpy(vbios_info.vbios_ver_str, 
> atom_context->vbios_ver_str,
> +   
> sizeof(atom_context->vbios_ver_str));
> +   memcpy(vbios_info.date, atom_context->date, 
> sizeof(atom_context->date));
> +
> +   return copy_to_user(out, _info,
> +   min((size_t)size, 
> sizeof(vbios_info))) ? -EFAULT : 0;
> +   }
> default:
> DRM_DEBUG_KMS("Invalid request %d\n",
> info->vbios_info.type);
> diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
> b/drivers/gpu/drm/amd/amdgpu/atom.c
> index 3dcb8b32f48b..6fa2229b7229 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atom.c
> @@ -31,6 +31,7 @@
>
>  #define ATOM_DEBUG
>
> +#include "atomfirmware.h"
>  #include "atom.h"
>  #include "atom-names.h"
>  #include "atom-bits.h"
> @@ -1299,12 +1300,168 @@ static void atom_index_iio(struct atom_context *ctx, 
> int base)
> }
>  }
>
> +static void atom_get_vbios_name(struct atom_context *ctx)
> +{
> +   unsigned char *p_rom;
> +   unsigned char str_num;
> +   unsigned short off_to_vbios_str;
> +   unsigned char *c_ptr;
> +   int name_size;
> +   int i;
> +
> +   const char *na = "--N/A--";
> +   char *back;
> +
> +   p_rom = ctx->bios;
> +
> +   str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS);
> +   if (str_num != 0) {
> +   off_to_vbios_str =
> +   *(unsigned short *)(p_rom + 
> OFFSET_TO_GET_ATOMBIOS_STRING_START);
> +
> +   c_ptr = (unsigned char *)(p_rom + off_to_vbios_str);
> +   } else {
> +   /* do not know where to find name */
> +   memcpy(ctx->name, na, 7);
> +   ctx->name[7] = 0;
> +   return;
> +   }
> +
> +   /*
> +* skip the atombios strings, usually 4
> +* 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type
> +*/
> +   for (i = 0; i < str_num; i++) {
> +   while (*c_ptr != 0)
> +   c_ptr++;
> +   c_ptr++;
> +   }
> +
> +   /* skip the following 2 chars: 0x0D 0x0A */
> +   c_ptr += 2;
> +
> +   name_size = strnlen(c_ptr, STRLEN_LONG - 1);
> +   memcpy(ctx->name, c_ptr, name_size);
> +   back = ctx->name + name_size;
> +   while ((*--back) == ' ')
> +   ;
> +   *(back + 1) = '\0';
> +}
> +
> +static void atom_get_vbios_date(struct atom_context *ctx)
> +{
> +   unsigned char *p_rom;
> +   unsigned char *date_in_rom;
> +
> +   p_rom = ctx->bios;
> +
> +   date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE;
> +
> +   ctx->date[0] = '2';
> +   ctx->date[1] = '0';
> +   ctx->date[2] = date_in_rom[6];
> +

[PATCH] drm/amdgpu: Add early fini callback

Use it to call disply code dependent on device->drv_data
before it's set to NULL on device unplug

v5:
Move HW finilization into this callback to prevent MMIO accesses
post cpi remove.

v7:
Split kfd suspend from device exit to expdite HW related
stuff to amdgpu_pci_remove

v8:
Squash previous KFD commit into this commit to avoid compile break.

Signed-off-by: Andrey Grodzovsky 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 59 +--
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  3 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
 drivers/gpu/drm/amd/include/amd_shared.h  |  2 +
 6 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f6696a3c778..2b06dee9a0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
 {
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5ffb07b02810..d8a537e8aea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8bee95ad32d9..bc75e35dd8d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct 
amdgpu_device *adev)
return 0;
 }
 
-/**
- * amdgpu_device_ip_fini - run fini for hardware IPs
- *
- * @adev: amdgpu_device pointer
- *
- * Main teardown pass for hardware IPs.  The list of all the hardware
- * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
- * are run.  hw_fini tears down the hardware associated with each IP
- * and sw_fini tears down any software state associated with each IP.
- * Returns 0 on success, negative error code on failure.
- */
-static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
 {
int i, r;
 
-   if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
-   amdgpu_virt_release_ras_err_handler_data(adev);
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   if (!adev->ip_blocks[i].version->funcs->early_fini)
+   continue;
 
-   amdgpu_ras_pre_fini(adev);
+   r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+   if (r) {
+   DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+   }
+   }
 
-   if (adev->gmc.xgmi.num_physical_nodes > 1)
-   amdgpu_xgmi_remove_device(adev);
+   amdgpu_amdkfd_suspend(adev, false);
 
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
-   amdgpu_amdkfd_device_fini(adev);
-
/* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
@@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device 
*adev)
adev->ip_blocks[i].status.hw = false;
}
 
+   return 0;
+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run.  hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+{
+   int i, r;
+
+   if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+

[PATCH] drm/amdgpu: Let userptr BO ttm have TTM_PAGE_FLAG_SG set

2021-05-19 Thread xinhui pan

We have met memory corruption due to unexcepted swapout/swapin.

swapout function create one swap storage which is filled with zero. And
set ttm->page_flags as TTM_PAGE_FLAG_SWAPPED. But because userptr BO ttm
has no backend page at that time, no real data is swapout to swap
storage.

swapin function is called during userptr BO populate as
TTM_PAGE_FLAG_SWAPPED is set. Now here is the problem, we swapin data to
ttm bakend memory from swap storage. That just causes the memory been
overwritten.

CPU 1   CPU 2
kfd alloc BO A(userptr) alloc BO B(GTT)
->init -> validate(create ttm)  -> init -> validate -> populate
init_user_pages   -> swapout BO A
-> get_user_pages (fill up ttm->pages)
 -> validate -> populate
  -> swapin BO A // memory overwritten

To fix this issue, we can set TTM_PAGE_FLAG_SG when we create userptr BO
ttm. Then swapout function would not swap it.

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 4 
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 928e8d57cd08..9a6ea966ddb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1410,7 +1410,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
-   alloc_flags = 0;
+   alloc_flags = AMDGPU_AMDKFD_CREATE_USERPTR_BO;
if (!offset || !*offset)
return -EINVAL;
user_addr = untagged_addr(*offset);
@@ -1477,8 +1477,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
-   if (user_addr)
-   bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 
(*mem)->va = va;
(*mem)->domain = domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c7f5cc503601..5b3f45637fb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1119,6 +1119,10 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_buffer_object *bo,
kfree(gtt);
return NULL;
}
+
+   if (abo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO)
+   gtt->ttm.page_flags |= TTM_PAGE_FLAG_SG;
+
return >ttm;
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[pull] amdgpu, amdkfd, radeon drm-next-5.14

Hi Dave, Daniel,

New stuff for 5.14, same as last week, but with fixed up fixes tag.

The following changes since commit af8352f1ff54c4fecf84e36315fd1928809a580b:

  Merge tag 'drm-msm-next-2021-04-11' of https://gitlab.freedesktop.org/drm/msm 
into drm-next (2021-04-13 23:35:54 +0200)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-next-5.14-2021-05-19

for you to fetch changes up to 2bb5b5f688cbbd5030629905d3ed8032ab46e79f:

  drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are 
connected (2021-05-19 22:29:40 -0400)


amd-drm-next-5.14-2021-05-19:

amdgpu:
- Aldebaran updates
- More LTTPR display work
- Vangogh updates
- SDMA 5.x GCR fixes
- RAS fixes
- PCIe ASPM support
- Modifier fixes
- Enable TMZ on Renoir
- Buffer object code cleanup
- Display overlay fixes
- Initial support for multiple eDP panels
- Initial SR-IOV support for Aldebaran
- DP link training refactor
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- MAINTAINERS fixes for amdgpu

amdkfd:
- Initial SR-IOV support for Aldebaran
- Topology fixes
- Initial HMM SVM support
- Misc code cleanups and bug fixes

radeon:
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- Flickering fix for Oland with multiple 4K displays

UAPI:
- amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag.
  This was always a kernel internal flag and userspace use of it has always 
been blocked.
  It's no longer needed so remove it.
- amdkgd: HMM SVM support
  Overview: https://patchwork.freedesktop.org/series/85562/
  Porposed userspace: 
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip


Alex Deucher (12):
  drm/amdgpu/gmc9: remove dummy read workaround for newer chips
  drm/amdgpu/display: add documentation for dmcub_trace_event_en
  MAINTAINERS: fix a few more amdgpu tree links
  drm/amdgpu: Add graphics cache rinse packet for sdma 5.0
  drm/amdgpu: drop the GCR packet from the emit_ib frame for sdma5.0
  drm/amdgpu: change the default timeout for kernel compute queues
  drm/amdgpu/pm: add documentation for pp_od_clock_voltage for APUs
  drm/amdgpu/pm: add documentation for pp_od_clock_voltage for vangogh
  drm/amdgpu/display: remove an old DCN3 guard
  drm/amdgpu/display: fix warning when CONFIG_DRM_AMD_DC_DCN is not defined
  drm/amdgpu/display: fix build when CONFIG_DRM_AMD_DC_DCN is not defined
  drm/amdgpu/display: fix dal_allocation documentation

Alex Sierra (12):
  drm/amdkfd: helper to convert gpu id and idx
  drm/amdkfd: add xnack enabled flag to kfd_process
  drm/amdkfd: add ioctl to configure and query xnack retries
  drm/amdgpu: enable 48-bit IH timestamp counter
  drm/amdkfd: SVM API call to restore page tables
  drm/amdkfd: add svm_bo reference for eviction fence
  drm/amdgpu: add param bit flag to create SVM BOs
  drm/amdgpu: svm bo enable_signal call condition
  drm/amdgpu: add svm_bo eviction to enable_signal cb
  drm/amdgpu: extend xnack limit page fault timeout
  drm/amdkfd: svm ranges creation for unregistered memory
  drm/amdkfd: set attribute access for default ranges

Anthony Koo (6):
  drm/amd/display: [FW Promotion] Release 0.0.60
  drm/amd/display: [FW Promotion] Release 0.0.61
  drm/amd/display: [FW Promotion] Release 0.0.62
  drm/amd/display: [FW Promotion] Release 0.0.63
  drm/amd/display: [FW Promotion] Release 0.0.64
  drm/amd/display: [FW Promotion] Release 0.0.65

Anthony Wang (4):
  drm/amd/display: Force vsync flip when reconfiguring MPCC
  drm/amd/display: Add DSC check to seamless boot validation
  drm/amd/display: disable seamless boot for external DP
  drm/amd/display: Handle potential dpp_inst mismatch with pipe_idx

Aric Cyr (8):
  drm/amd/display: 3.2.131
  drm/amd/display: Fix FreeSync when RGB MPO in use
  drm/amd/display: 3.2.132
  drm/amd/display: 3.2.133
  drm/amdgpu/dc: Revert commit "treat memory as a single-channel"
  drm/amd/display: 3.2.134
  drm/amd/display: 3.2.135
  drm/amd/display: 3.2.135.1

Bas Nieuwenhuizen (2):
  drm/amdgpu: Init GFX10_ADDR_CONFIG for VCN v3 in DPG mode.
  drm/amdgpu: Use device specific BO size & stride check.

Bing Guo (1):
  drm/amd/display: add helper for enabling mst stream features

Brandon Syu (1):
  drm/amd/display: fix HDCP reset sequence on reinitialize

Calvin Hou (1):
  drm/amd/display: remove checking sink in is_timing_changed

Chaitanya Dhere (1):
  drm/amd/display: DETBufferSizeInKbyte variable type modifications

Chris Park (1):
  drm/amd/display: Fix BSOD with NULL check

Christian König (4):
  drm/amdgpu: fix coding style and documentation in amdgpu_gtt_mgr.c
  drm/amdgpu:

[PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-19 Thread Jiawei Gu

Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.

Provides a way for the user application to get the VBIOS
information without having to parse the binary.
It is useful for the user to be able to display in a simple way the VBIOS
version in their system if they happen to encounter an issue.

V2:
Use numeric serial.
Parse and expose vbios version string.

V3:
Remove redundant data in drm_amdgpu_info_vbios struct.

V4:
64 bit alignment in drm_amdgpu_info_vbios.

Signed-off-by: Jiawei Gu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  15 ++
 drivers/gpu/drm/amd/amdgpu/atom.c  | 172 +
 drivers/gpu/drm/amd/amdgpu/atom.h  |  10 ++
 drivers/gpu/drm/amd/include/atomfirmware.h |   5 +
 include/uapi/drm/amdgpu_drm.h  |  11 ++
 5 files changed, 213 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 8d12e474745a..524e4fe5efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -861,6 +861,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
min((size_t)size, 
(size_t)(bios_size - bios_offset)))
? -EFAULT : 0;
}
+   case AMDGPU_INFO_VBIOS_INFO: {
+   struct drm_amdgpu_info_vbios vbios_info = {};
+   struct atom_context *atom_context;
+
+   atom_context = adev->mode_info.atom_context;
+   memcpy(vbios_info.name, atom_context->name, 
sizeof(atom_context->name));
+   memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
sizeof(atom_context->vbios_pn));
+   vbios_info.version = atom_context->version;
+   memcpy(vbios_info.vbios_ver_str, 
atom_context->vbios_ver_str,
+   
sizeof(atom_context->vbios_ver_str));
+   memcpy(vbios_info.date, atom_context->date, 
sizeof(atom_context->date));
+
+   return copy_to_user(out, _info,
+   min((size_t)size, 
sizeof(vbios_info))) ? -EFAULT : 0;
+   }
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->vbios_info.type);
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c
index 3dcb8b32f48b..6fa2229b7229 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -31,6 +31,7 @@
 
 #define ATOM_DEBUG
 
+#include "atomfirmware.h"
 #include "atom.h"
 #include "atom-names.h"
 #include "atom-bits.h"
@@ -1299,12 +1300,168 @@ static void atom_index_iio(struct atom_context *ctx, 
int base)
}
 }
 
+static void atom_get_vbios_name(struct atom_context *ctx)
+{
+   unsigned char *p_rom;
+   unsigned char str_num;
+   unsigned short off_to_vbios_str;
+   unsigned char *c_ptr;
+   int name_size;
+   int i;
+
+   const char *na = "--N/A--";
+   char *back;
+
+   p_rom = ctx->bios;
+
+   str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS);
+   if (str_num != 0) {
+   off_to_vbios_str =
+   *(unsigned short *)(p_rom + 
OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+   c_ptr = (unsigned char *)(p_rom + off_to_vbios_str);
+   } else {
+   /* do not know where to find name */
+   memcpy(ctx->name, na, 7);
+   ctx->name[7] = 0;
+   return;
+   }
+
+   /*
+* skip the atombios strings, usually 4
+* 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type
+*/
+   for (i = 0; i < str_num; i++) {
+   while (*c_ptr != 0)
+   c_ptr++;
+   c_ptr++;
+   }
+
+   /* skip the following 2 chars: 0x0D 0x0A */
+   c_ptr += 2;
+
+   name_size = strnlen(c_ptr, STRLEN_LONG - 1);
+   memcpy(ctx->name, c_ptr, name_size);
+   back = ctx->name + name_size;
+   while ((*--back) == ' ')
+   ;
+   *(back + 1) = '\0';
+}
+
+static void atom_get_vbios_date(struct atom_context *ctx)
+{
+   unsigned char *p_rom;
+   unsigned char *date_in_rom;
+
+   p_rom = ctx->bios;
+
+   date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE;
+
+   ctx->date[0] = '2';
+   ctx->date[1] = '0';
+   ctx->date[2] = date_in_rom[6];
+   ctx->date[3] = date_in_rom[7];
+   ctx->date[4] = '/';
+   ctx->date[5] = date_in_rom[0];
+   ctx->date[6] = date_in_rom[1];
+   ctx->date[7] = '/';
+   ctx->date[8] = date_in_rom[3];
+   ctx->date[9] = date_in_rom[4];
+   ctx->date[10] = ' ';
+   ctx->date[11] = date_in_rom[9];
+   ctx->date[12] = date_in_rom[10];
+   ctx->date[13] = date_in_rom[11];
+

回复: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and swapin

2021-05-19 Thread Pan, Xinhui

[AMD Official Use Only]

I am not sure if we can create a ttm_bo_type_sg bo for userptr. But I have 
another idea now. we can use flag AMDGPU_AMDKFD_CREATE_USERPTR_BO to create the 
userptr bo.

发件人: Kuehling, Felix 
发送时间: 2021年5月19日 23:11
收件人: Christian König; Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; dan...@ffwll.ch; Koenig, Christian; 
dri-de...@lists.freedesktop.org
主题: Re: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout 
and swapin

Looks like we're creating the userptr BO as ttm_bo_type_device. I guess
we should be using ttm_bo_type_sg? BTW, amdgpu_gem_userptr_ioctl also
uses ttm_bo_type_device.

Regards,
  Felix


Am 2021-05-19 um 6:01 a.m. schrieb Christian König:
> I'm scratching my head how that is even possible.
>
> See when a BO is created in the system domain it is just an empty
> hull, e.g. without backing store and allocated pages.
>
> So the swapout function will just ignore it.
>
> Christian.
>
> Am 19.05.21 um 07:07 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I have reverted Chris'  patch, still hit this failure.
>> Just see two lines in Chris' patch. Any BO in cpu domian would be
>> swapout first. That is why we hit this issue frequently now. But the
>> bug is there long time ago.
>>
>> -   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> -   list_for_each_entry(bo, >swap_lru[i], swap) {
>> [snip]
>> +   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> +   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>>
>>
>> 
>> 发件人: Pan, Xinhui 
>> 发送时间: 2021年5月19日 12:09
>> 收件人: Kuehling, Felix; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian;
>> dri-de...@lists.freedesktop.org; dan...@ffwll.ch
>> 主题: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to
>> swapout and swapin
>>
>> yes, we really dont swapout SG BOs.
>> The problems is that before we validate a userptr BO, we create this
>> BO in CPU domain by default. So this BO has chance to swapout.
>>
>> we set flag TTM_PAGE_FLAG_SG on userptr BO in popluate() which is too
>> late.
>> I have not try to revert Chris' patch as I think it desnt help. Or I
>> can have a try later.
>>
>> 
>> 发件人: Kuehling, Felix 
>> 发送时间: 2021年5月19日 11:29
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian;
>> dri-de...@lists.freedesktop.org; dan...@ffwll.ch
>> 主题: Re: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to
>> swapout and swapin
>>
>> Swapping SG BOs makes no sense, because TTM doesn't own the pages of
>> this type of BO.
>>
>> Last I checked, userptr BOs (and other SG BOs) were protected from
>> swapout by the fact that they would not be added to the swap-LRU. But it
>> looks like Christian just removed the swap-LRU. I guess this broke that
>> protection:
>>
>> commit 2cb51d22d70b18eaf339abf9758bf0b7608da65c
>> Author: Christian König 
>> Date:   Tue Oct 6 16:30:09 2020 +0200
>>
>>   drm/ttm: remove swap LRU v3
>>
>>   Instead evict round robin from each devices SYSTEM and TT domain.
>>
>>   v2: reorder num_pages access reported by Dan's script
>>   v3: fix rebase fallout, num_pages should be 32bit
>>
>>   Signed-off-by: Christian König 
>>   Tested-by: Nirmoy Das 
>>   Reviewed-by: Huang Rui 
>>   Reviewed-by: Matthew Auld 
>>   Link: https://patchwork.freedesktop.org/patch/424009/
>>
>> Regards,
>> Felix
>>
>>
>> On 2021-05-18 10:28 p.m., xinhui pan wrote:
>>> cpu 1   cpu 2
>>> kfd alloc BO A(userptr) alloc BO B(GTT)
>>>   ->init -> validate   -> init ->
>>> validate -> populate
>>>   init_user_pages-> swapout BO A
>>> //hit ttm pages limit
>>>-> get_user_pages (fill up ttm->pages)
>>> -> validate -> populate
>>> -> swapin BO A // Now hit the BUG
>>>
>>> We know that get_user_pages may race with swapout on same BO.
>>> Threre are some issues I have met.
>>> 1) memory corruption.
>>> This is because we do a swap before memory is setup. ttm_tt_swapout()
>>> just create a swap_storage with its content being 0x0. So when we setup
>>> memory after the swapout. The following swapin makes the memory
>>> corrupted.
>>>
>>> 2) panic
>>> When swapout happes with get_user_pages, they touch ttm->pages without
>>> anylock. It causes memory corruption too. But I hit page fault mostly.
>>>
>>> Signed-off-by: xinhui pan 
>>> ---
>>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16
>>> +++-
>>>1 file changed, 15 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 928e8d57cd08..42460e4480f8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

回复: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and swapin

2021-05-19 Thread Pan, Xinhui

[AMD Official Use Only]

swapout function create one swap storage which is filled with zero. And set 
ttm->page_flags as TTM_PAGE_FLAG_SWAPPED.  Just because ttm has no backend page 
this time, no real data is swapout to this swap storage.

swapin function is called during populate as TTM_PAGE_FLAG_SWAPPED is set.
Now here is the problem, we swapin data to ttm bakend memory from swap storage. 
That just causes the memory been overwritten.


发件人: Christian König 
发送时间: 2021年5月19日 18:01
收件人: Pan, Xinhui; Kuehling, Felix; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; dan...@ffwll.ch; Koenig, Christian; 
dri-de...@lists.freedesktop.org
主题: Re: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout 
and swapin

I'm scratching my head how that is even possible.

See when a BO is created in the system domain it is just an empty hull,
e.g. without backing store and allocated pages.

So the swapout function will just ignore it.

Christian.

Am 19.05.21 um 07:07 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> I have reverted Chris'  patch, still hit this failure.
> Just see two lines in Chris' patch. Any BO in cpu domian would be swapout 
> first. That is why we hit this issue frequently now. But the bug is there 
> long time ago.
>
> -   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -   list_for_each_entry(bo, >swap_lru[i], swap) {
> [snip]
> +   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> +   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>
>
> 
> 发件人: Pan, Xinhui 
> 发送时间: 2021年5月19日 12:09
> 收件人: Kuehling, Felix; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander; Koenig, Christian; dri-de...@lists.freedesktop.org; 
> dan...@ffwll.ch
> 主题: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and 
> swapin
>
> yes, we really dont swapout SG BOs.
> The problems is that before we validate a userptr BO, we create this BO in 
> CPU domain by default. So this BO has chance to swapout.
>
> we set flag TTM_PAGE_FLAG_SG on userptr BO in popluate() which is too late.
> I have not try to revert Chris' patch as I think it desnt help. Or I can have 
> a try later.
>
> 
> 发件人: Kuehling, Felix 
> 发送时间: 2021年5月19日 11:29
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander; Koenig, Christian; dri-de...@lists.freedesktop.org; 
> dan...@ffwll.ch
> 主题: Re: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and 
> swapin
>
> Swapping SG BOs makes no sense, because TTM doesn't own the pages of
> this type of BO.
>
> Last I checked, userptr BOs (and other SG BOs) were protected from
> swapout by the fact that they would not be added to the swap-LRU. But it
> looks like Christian just removed the swap-LRU. I guess this broke that
> protection:
>
> commit 2cb51d22d70b18eaf339abf9758bf0b7608da65c
> Author: Christian König 
> Date:   Tue Oct 6 16:30:09 2020 +0200
>
>   drm/ttm: remove swap LRU v3
>
>   Instead evict round robin from each devices SYSTEM and TT domain.
>
>   v2: reorder num_pages access reported by Dan's script
>   v3: fix rebase fallout, num_pages should be 32bit
>
>   Signed-off-by: Christian König 
>   Tested-by: Nirmoy Das 
>   Reviewed-by: Huang Rui 
>   Reviewed-by: Matthew Auld 
>   Link: 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F424009%2Fdata=04%7C01%7CXinhui.Pan%40amd.com%7Cb4422d8b3e4947cd243c08d91aad14c3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637570152942496679%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=K3%2FnFpN56y8L49UuYRM6SqefVFLnqIwpDAtWpS1XvnQ%3Dreserved=0
>
> Regards,
> Felix
>
>
> On 2021-05-18 10:28 p.m., xinhui pan wrote:
>> cpu 1   cpu 2
>> kfd alloc BO A(userptr) alloc BO B(GTT)
>>   ->init -> validate   -> init -> validate 
>> -> populate
>>   init_user_pages-> swapout BO A //hit ttm 
>> pages limit
>>-> get_user_pages (fill up ttm->pages)
>> -> validate -> populate
>> -> swapin BO A // Now hit the BUG
>>
>> We know that get_user_pages may race with swapout on same BO.
>> Threre are some issues I have met.
>> 1) memory corruption.
>> This is because we do a swap before memory is setup. ttm_tt_swapout()
>> just create a swap_storage with its content being 0x0. So when we setup
>> memory after the swapout. The following swapin makes the memory
>> corrupted.
>>
>> 2) panic
>> When swapout happes with get_user_pages, they touch ttm->pages without
>> anylock. It causes memory corruption too. But I hit page fault mostly.
>>
>> Signed-off-by: xinhui pan 
>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +++-
>>1 file

[pull] radeon, amdgpu drm-fixes-5.13

Hi Dave, Daniel,

Fixes for 5.13.

The following changes since commit d07f6ca923ea0927a1024dfccafc5b53b61cfecc:

  Linux 5.13-rc2 (2021-05-16 15:27:44 -0700)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.13-2021-05-19

for you to fetch changes up to a2b4785f01280a4291edb9fda69032fc2e4bfd3f:

  drm/amdgpu: stop touching sched.ready in the backend (2021-05-19 18:07:43 
-0400)


amd-drm-fixes-5.13-2021-05-19:

amdgpu:
- Fix downscaling ratio on DCN3.x
- Fix for non-4K pages
- PCO/RV compute hang fix
- Dongle fix
- Aldebaran codec query support
- Refcount leak fix
- Use after free fix
- Navi12 golden settings updates
- GPU reset fixes

radeon:
- Fix for imported BO handling


Changfeng (1):
  drm/amdgpu: disable 3DCGCG on picasso/raven1 to avoid compute hang

Chris Park (1):
  drm/amd/display: Disconnect non-DP with no EDID

Christian König (2):
  drm/radeon: use the dummy page for GART if needed
  drm/amdgpu: stop touching sched.ready in the backend

Guchun Chen (2):
  drm/amdgpu: update gc golden setting for Navi12
  drm/amdgpu: update sdma golden setting for Navi12

James Zhu (1):
  drm/amdgpu: add video_codecs query support for aldebaran

Jingwen Chen (1):
  drm/amd/amdgpu: fix refcount leak

Lang Yu (1):
  drm/amd/amdgpu: fix a potential deadlock in gpu reset

Nikola Cornij (1):
  drm/amd/display: Use the correct max downscaling value for DCN3.x family

Yi Li (1):
  drm/amdgpu: Fix GPU TLB update error when PAGE_SIZE > AMDGPU_PAGE_SIZE

xinhui pan (1):
  drm/amdgpu: Fix a use-after-free

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  3 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |  6 --
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 10 +++---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c |  2 --
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c |  2 --
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c |  4 
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c |  5 -
 drivers/gpu/drm/amd/amdgpu/soc15.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c  |  8 +---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c  | 18 ++
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c  |  7 ---
 .../gpu/drm/amd/display/dc/dcn301/dcn301_resource.c|  7 ---
 .../gpu/drm/amd/display/dc/dcn302/dcn302_resource.c|  7 ---
 drivers/gpu/drm/radeon/radeon_gart.c   |  3 ++-
 16 files changed, 54 insertions(+), 35 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-19 Thread Gu, JiaWei (Will)

[AMD Official Use Only - Internal Distribution Only]

Thanks Christian!

Happy to learn new tricks.

Best regards,
Jiawei

From: Christian König 
Sent: Wednesday, May 19, 2021 9:23 PM
To: Deucher, Alexander ; Gu, JiaWei (Will) 
; Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; mar...@gmail.com
Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

Good point.

If you want to double check the alignment you can use something like "pahole 
drivers/gpu/drm/amd/amdgpu/amdgpu.ko -C drm_amdgpu_info_vbios" after building 
the kernel module.

Regards,
Christian.
Am 19.05.21 um 15:09 schrieb Deucher, Alexander:

[Public]

The structure is not 64 bit aligned.  I think you want something like:

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u32 pad;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};

From: Gu, JiaWei (Will) 
Sent: Tuesday, May 18, 2021 1:58 AM
To: Nieto, David M ; Koenig, 
Christian ; 
amd-gfx@lists.freedesktop.org 
; 
mar...@gmail.com 
; Deucher, Alexander 

Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[Public]


Hi all,



Then the struct looks like:



> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};



Sample output:



vbios name : NAVI12 A0 XT D30501 8GB EVAL 1150e/334m HYN/SAM
vbios pn : 113-D3050100-104
vbios version : 285409288
vbios ver_str : 017.003.000.008.016956
vbios date : 2021/05/03 23:32

Please help double confirm that we're all fine with it and there's no need to 
add & remove anything.



Best regards,

Jiawei



From: Nieto, David M 
Sent: Tuesday, May 18, 2021 12:40 PM
To: Gu, JiaWei (Will) ; Koenig, 
Christian ; 
amd-gfx@lists.freedesktop.org; 
mar...@gmail.com; Deucher, Alexander 

Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[Public]



Yes, let's remove that too,



Thanks,



David



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 8:07 PM
To: Nieto, David M mailto:david.ni...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; 
mar...@gmail.com 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



OK let's remove serial.



dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);



I think we can remove dbdf as well.



Best regards,

Jiawei



From: Nieto, David M mailto:david.ni...@amd.com>>
Sent: Tuesday, May 18, 2021 10:45 AM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org; 
mar...@gmail.com; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.



Not sure what dbdf stands for.



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8

Re: [PATCH] drm/amd/display: take dc_lock in short pulse handler only

2021-05-19 Thread Aurabindo Pillai




On 2021-05-19 4:59 p.m., Kazlauskas, Nicholas wrote:

On 2021-05-19 4:55 p.m., Aurabindo Pillai wrote:

[Why]
Conditions that end up modifying the global dc state must be locked.
However, during mst allocate payload sequence, lock is already taken.
With StarTech 1.2 DP hub, we get an HPD RX interrupt for a reason other
than to indicate down reply availability right after sending payload
allocation. The handler again takes dc lock before calling the
dc's HPD RX handler. Due to this contention, the DRM thread which waits
for MST down reply never gets a chance to finish its waiting
successfully and ends up timing out. Once the lock is released, the hpd
rx handler fires and goes ahead to read from the MST HUB, but now its
too late and the HUB doesnt lightup all displays since DRM lacks error
handling when payload allocation fails.

[How]
Take lock only if there is a change in link status or if automated test
pattern bit is set. The latter fixes the null pointer dereference when
running certain DP Link Layer Compliance test.

Signed-off-by: Aurabindo Pillai 


Discussed this a bit offline and I'd *really* like the proper interface in 
sooner rather than later.

Conditional locking is almost always a sign of a bug, in this case we know it's OK but someone can 
change the function underneath later without understanding that we're duplicating some of the 
checking logic in the upper layer.


I don't think the code changes enough in this area for this to happen (as it's spec based), but 
please be mindful and consider splitting the checking logic (which is thread safe) out with the link 
loss logic (the functional bit, that isn't thread safe).


Reviewed-by: Nicholas Kazlauskas 


Thanks for the review! I'll get back to fixing this properly in a few weeks.


Regards,
Nicholas Kazlauskas


---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +--
  .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  2 +-
  .../gpu/drm/amd/display/dc/inc/dc_link_dp.h   |  4 
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index e79910cc179c..2c9d099adfc2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -28,6 +28,7 @@
  #include "dm_services_types.h"
  #include "dc.h"
+#include "dc_link_dp.h"
  #include "dc/inc/core_types.h"
  #include "dal_asic_id.h"
  #include "dmub/dmub_srv.h"
@@ -2740,6 +2741,7 @@ static void handle_hpd_rx_irq(void *param)
  enum dc_connection_type new_connection_type = dc_connection_none;
  struct amdgpu_device *adev = drm_to_adev(dev);
  union hpd_irq_data hpd_irq_data;
+    bool lock_flag = 0;
  memset(_irq_data, 0, sizeof(hpd_irq_data));
@@ -2769,15 +2771,28 @@ static void handle_hpd_rx_irq(void *param)
  }
  }
-    if (!amdgpu_in_reset(adev)) {
+    /*
+ * TODO: We need the lock to avoid touching DC state while it's being
+ * modified during automated compliance testing, or when link loss
+ * happens. While this should be split into subhandlers and proper
+ * interfaces to avoid having to conditionally lock like this in the
+ * outer layer, we need this workaround temporarily to allow MST
+ * lightup in some scenarios to avoid timeout.
+ */
+    if (!amdgpu_in_reset(adev) &&
+    (hpd_rx_irq_check_link_loss_status(dc_link, _irq_data) ||
+ hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST)) {
  mutex_lock(>dm.dc_lock);
+    lock_flag = 1;
+    }
+
  #ifdef CONFIG_DRM_AMD_DC_HDCP
  result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
  #else
  result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
  #endif
+    if (!amdgpu_in_reset(adev) && lock_flag)
  mutex_unlock(>dm.dc_lock);
-    }
  out:
  if (result && !is_mst_root_connector) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c

index 9e08410bfdfd..32fb9cdbd980 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2070,7 +2070,7 @@ enum dc_status read_hpd_rx_irq_data(
  return retval;
  }
-static bool hpd_rx_irq_check_link_loss_status(
+bool hpd_rx_irq_check_link_loss_status(
  struct dc_link *link,
  union hpd_irq_data *hpd_irq_dpcd_data)
  {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h

index ffc3f2c63db8..7dd8bca542b9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -68,6 +68,10 @@ bool perform_link_training_with_retries(
  enum signal_type signal,
  bool do_fallback);
+bool hpd_rx_irq_check_link_loss_status(
+    struct dc_link *link,
+    union hpd_irq_data *hpd_irq_dpcd_data);
+
  bool is_mst_supported(struct dc_link *link);
  bool

Re: [PATCH] drm/amd/display: take dc_lock in short pulse handler only

2021-05-19 Thread Kazlauskas, Nicholas


On 2021-05-19 4:55 p.m., Aurabindo Pillai wrote:

[Why]
Conditions that end up modifying the global dc state must be locked.
However, during mst allocate payload sequence, lock is already taken.
With StarTech 1.2 DP hub, we get an HPD RX interrupt for a reason other
than to indicate down reply availability right after sending payload
allocation. The handler again takes dc lock before calling the
dc's HPD RX handler. Due to this contention, the DRM thread which waits
for MST down reply never gets a chance to finish its waiting
successfully and ends up timing out. Once the lock is released, the hpd
rx handler fires and goes ahead to read from the MST HUB, but now its
too late and the HUB doesnt lightup all displays since DRM lacks error
handling when payload allocation fails.

[How]
Take lock only if there is a change in link status or if automated test
pattern bit is set. The latter fixes the null pointer dereference when
running certain DP Link Layer Compliance test.

Signed-off-by: Aurabindo Pillai 


Discussed this a bit offline and I'd *really* like the proper interface 
in sooner rather than later.


Conditional locking is almost always a sign of a bug, in this case we 
know it's OK but someone can change the function underneath later 
without understanding that we're duplicating some of the checking logic 
in the upper layer.


I don't think the code changes enough in this area for this to happen 
(as it's spec based), but please be mindful and consider splitting the 
checking logic (which is thread safe) out with the link loss logic (the 
functional bit, that isn't thread safe).


Reviewed-by: Nicholas Kazlauskas 

Regards,
Nicholas Kazlauskas


---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +--
  .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  2 +-
  .../gpu/drm/amd/display/dc/inc/dc_link_dp.h   |  4 
  3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e79910cc179c..2c9d099adfc2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -28,6 +28,7 @@
  
  #include "dm_services_types.h"

  #include "dc.h"
+#include "dc_link_dp.h"
  #include "dc/inc/core_types.h"
  #include "dal_asic_id.h"
  #include "dmub/dmub_srv.h"
@@ -2740,6 +2741,7 @@ static void handle_hpd_rx_irq(void *param)
enum dc_connection_type new_connection_type = dc_connection_none;
struct amdgpu_device *adev = drm_to_adev(dev);
union hpd_irq_data hpd_irq_data;
+   bool lock_flag = 0;
  
  	memset(_irq_data, 0, sizeof(hpd_irq_data));
  
@@ -2769,15 +2771,28 @@ static void handle_hpd_rx_irq(void *param)

}
}
  
-	if (!amdgpu_in_reset(adev)) {

+   /*
+* TODO: We need the lock to avoid touching DC state while it's being
+* modified during automated compliance testing, or when link loss
+* happens. While this should be split into subhandlers and proper
+* interfaces to avoid having to conditionally lock like this in the
+* outer layer, we need this workaround temporarily to allow MST
+* lightup in some scenarios to avoid timeout.
+*/
+   if (!amdgpu_in_reset(adev) &&
+   (hpd_rx_irq_check_link_loss_status(dc_link, _irq_data) ||
+hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST)) {
mutex_lock(>dm.dc_lock);
+   lock_flag = 1;
+   }
+
  #ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
  #else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
  #endif
+   if (!amdgpu_in_reset(adev) && lock_flag)
mutex_unlock(>dm.dc_lock);
-   }
  
  out:

if (result && !is_mst_root_connector) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 9e08410bfdfd..32fb9cdbd980 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2070,7 +2070,7 @@ enum dc_status read_hpd_rx_irq_data(
return retval;
  }
  
-static bool hpd_rx_irq_check_link_loss_status(

+bool hpd_rx_irq_check_link_loss_status(
struct dc_link *link,
union hpd_irq_data *hpd_irq_dpcd_data)
  {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index ffc3f2c63db8..7dd8bca542b9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -68,6 +68,10 @@ bool perform_link_training_with_retries(
enum signal_type signal,
bool do_fallback);
  
+bool hpd_rx_irq_check_link_loss_status(

+   struct dc_link *link,
+   union hpd_irq_data *hpd_irq_dpcd_data);
+
  bool is_mst_supported(struct dc_link *link);
  
  bool

[PATCH] drm/amd/display: take dc_lock in short pulse handler only

2021-05-19 Thread Aurabindo Pillai

[Why]
Conditions that end up modifying the global dc state must be locked.
However, during mst allocate payload sequence, lock is already taken.
With StarTech 1.2 DP hub, we get an HPD RX interrupt for a reason other
than to indicate down reply availability right after sending payload
allocation. The handler again takes dc lock before calling the
dc's HPD RX handler. Due to this contention, the DRM thread which waits
for MST down reply never gets a chance to finish its waiting
successfully and ends up timing out. Once the lock is released, the hpd
rx handler fires and goes ahead to read from the MST HUB, but now its
too late and the HUB doesnt lightup all displays since DRM lacks error
handling when payload allocation fails.

[How]
Take lock only if there is a change in link status or if automated test
pattern bit is set. The latter fixes the null pointer dereference when
running certain DP Link Layer Compliance test.

Signed-off-by: Aurabindo Pillai 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +--
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  2 +-
 .../gpu/drm/amd/display/dc/inc/dc_link_dp.h   |  4 
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e79910cc179c..2c9d099adfc2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -28,6 +28,7 @@
 
 #include "dm_services_types.h"
 #include "dc.h"
+#include "dc_link_dp.h"
 #include "dc/inc/core_types.h"
 #include "dal_asic_id.h"
 #include "dmub/dmub_srv.h"
@@ -2740,6 +2741,7 @@ static void handle_hpd_rx_irq(void *param)
enum dc_connection_type new_connection_type = dc_connection_none;
struct amdgpu_device *adev = drm_to_adev(dev);
union hpd_irq_data hpd_irq_data;
+   bool lock_flag = 0;
 
memset(_irq_data, 0, sizeof(hpd_irq_data));
 
@@ -2769,15 +2771,28 @@ static void handle_hpd_rx_irq(void *param)
}
}
 
-   if (!amdgpu_in_reset(adev)) {
+   /*
+* TODO: We need the lock to avoid touching DC state while it's being
+* modified during automated compliance testing, or when link loss
+* happens. While this should be split into subhandlers and proper
+* interfaces to avoid having to conditionally lock like this in the
+* outer layer, we need this workaround temporarily to allow MST
+* lightup in some scenarios to avoid timeout.
+*/
+   if (!amdgpu_in_reset(adev) &&
+   (hpd_rx_irq_check_link_loss_status(dc_link, _irq_data) ||
+hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST)) {
mutex_lock(>dm.dc_lock);
+   lock_flag = 1;
+   }
+
 #ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
 #else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
 #endif
+   if (!amdgpu_in_reset(adev) && lock_flag)
mutex_unlock(>dm.dc_lock);
-   }
 
 out:
if (result && !is_mst_root_connector) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 9e08410bfdfd..32fb9cdbd980 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2070,7 +2070,7 @@ enum dc_status read_hpd_rx_irq_data(
return retval;
 }
 
-static bool hpd_rx_irq_check_link_loss_status(
+bool hpd_rx_irq_check_link_loss_status(
struct dc_link *link,
union hpd_irq_data *hpd_irq_dpcd_data)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index ffc3f2c63db8..7dd8bca542b9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -68,6 +68,10 @@ bool perform_link_training_with_retries(
enum signal_type signal,
bool do_fallback);
 
+bool hpd_rx_irq_check_link_loss_status(
+   struct dc_link *link,
+   union hpd_irq_data *hpd_irq_dpcd_data);
+
 bool is_mst_supported(struct dc_link *link);
 
 bool detect_dp_sink_caps(struct dc_link *link);
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: Fix GPU scaling regression by FS video support

2021-05-19 Thread Aurabindo Pillai


Reviewed-by: Aurabindo Pillai 

On 2021-05-19 4:12 p.m., Nicholas Kazlauskas wrote:

[Why]
FS video support regressed GPU scaling and the scaled buffer ends up
stuck in the top left of the screen at native size - full, aspect,
center scaling modes do not function.

This is because decide_crtc_timing_for_drm_display_mode() does not
get called when scaling is enabled.

[How]
Split recalculate timing and scaling into two different flags.

We don't want to call drm_mode_set_crtcinfo() for scaling, but we
do want to call it for FS video.

Optimize and move preferred_refresh calculation next to
decide_crtc_timing_for_drm_display_mode() like it used to be since
that's not used for FS video.

We don't need to copy over the VIC or polarity in the case of FS video
modes because those don't change.

Fixes: a372f4abec ("drm/amd/display: Skip modeset for front porch change")

Cc: Aurabindo Pillai 
Signed-off-by: Nicholas Kazlauskas 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8cd270f129..759621b0e8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5672,7 +5672,8 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
struct drm_display_mode saved_mode;
struct drm_display_mode *freesync_mode = NULL;
bool native_mode_found = false;
-   bool recalculate_timing = dm_state ? (dm_state->scaling != RMX_OFF) : 
false;
+   bool recalculate_timing = false;
+   bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
int mode_refresh;
int preferred_refresh = 0;
  #if defined(CONFIG_DRM_AMD_DC_DCN)
@@ -5735,7 +5736,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
 */
DRM_DEBUG_DRIVER("No preferred mode found\n");
} else {
-   recalculate_timing |= amdgpu_freesync_vid_mode &&
+   recalculate_timing = amdgpu_freesync_vid_mode &&
 is_freesync_video_mode(, aconnector);
if (recalculate_timing) {
freesync_mode = 
get_highest_refresh_rate_mode(aconnector, false);
@@ -5743,11 +5744,10 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
mode = *freesync_mode;
} else {
decide_crtc_timing_for_drm_display_mode(
-   , preferred_mode,
-   dm_state ? (dm_state->scaling != RMX_OFF) : 
false);
-   }
+   , preferred_mode, scale);
  
-		preferred_refresh = drm_mode_vrefresh(preferred_mode);

+   preferred_refresh = drm_mode_vrefresh(preferred_mode);
+   }
}
  
  	if (recalculate_timing)

@@ -5759,7 +5759,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
* If scaling is enabled and refresh rate didn't change
* we copy the vic and polarities of the old timings
*/
-   if (!recalculate_timing || mode_refresh != preferred_refresh)
+   if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(
stream, , >base, con_state, NULL,
requested_bpc);



--
Regards,
Aurabindo Pillai
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/acpi: unify ATCS handling (v2)

Treat it like ATIF and check both the dGPU and APU for
the method.  This is required because ATCS may be hung
off of the APU in ACPI on A+A systems.

v2: add back accidently removed ACPI handle check.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  17 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 124 ---
 2 files changed, 91 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3147c1c935c8..b92eb068be12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -269,6 +269,7 @@ struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
 struct amdgpu_atif;
+struct amdgpu_atcs;
 struct kfd_vm_fault_info;
 struct amdgpu_hive_info;
 struct amdgpu_reset_context;
@@ -685,20 +686,6 @@ struct amdgpu_vram_scratch {
u64 gpu_addr;
 };
 
-/*
- * ACPI
- */
-struct amdgpu_atcs_functions {
-   bool get_ext_state;
-   bool pcie_perf_req;
-   bool pcie_dev_rdy;
-   bool pcie_bus_width;
-};
-
-struct amdgpu_atcs {
-   struct amdgpu_atcs_functions functions;
-};
-
 /*
  * CGS
  */
@@ -829,7 +816,7 @@ struct amdgpu_device {
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct amdgpu_atif  *atif;
-   struct amdgpu_atcs  atcs;
+   struct amdgpu_atcs  *atcs;
struct mutexsrbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutexgrbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index bf2939b6eb43..93f5207104ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -71,12 +71,25 @@ struct amdgpu_atif {
struct amdgpu_dm_backlight_caps backlight_caps;
 };
 
+struct amdgpu_atcs_functions {
+   bool get_ext_state;
+   bool pcie_perf_req;
+   bool pcie_dev_rdy;
+   bool pcie_bus_width;
+};
+
+struct amdgpu_atcs {
+   acpi_handle handle;
+
+   struct amdgpu_atcs_functions functions;
+};
+
 /* Call the ATIF method
  */
 /**
  * amdgpu_atif_call - call an ATIF method
  *
- * @atif: acpi handle
+ * @atif: atif structure
  * @function: the ATIF function to execute
  * @params: ATIF function params
  *
@@ -236,6 +249,35 @@ static acpi_handle amdgpu_atif_probe_handle(acpi_handle 
dhandle)
return handle;
 }
 
+static acpi_handle amdgpu_atcs_probe_handle(acpi_handle dhandle)
+{
+   acpi_handle handle = NULL;
+   char acpi_method_name[255] = { 0 };
+   struct acpi_buffer buffer = { sizeof(acpi_method_name), 
acpi_method_name };
+   acpi_status status;
+
+   /* For PX/HG systems, ATCS and ATPX are in the iGPU's namespace, on 
dGPU only
+* systems, ATIF is in the dGPU's namespace.
+*/
+   status = acpi_get_handle(dhandle, "ATCS", );
+   if (ACPI_SUCCESS(status))
+   goto out;
+
+   if (amdgpu_has_atpx()) {
+   status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATCS",
+);
+   if (ACPI_SUCCESS(status))
+   goto out;
+   }
+
+   DRM_DEBUG_DRIVER("No ATCS handle found\n");
+   return NULL;
+out:
+   acpi_get_name(handle, ACPI_FULL_PATHNAME, );
+   DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
+   return handle;
+}
+
 /**
  * amdgpu_atif_get_notification_params - determine notify configuration
  *
@@ -485,14 +527,15 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 /**
  * amdgpu_atcs_call - call an ATCS method
  *
- * @handle: acpi handle
+ * @atcs: atcs structure
  * @function: the ATCS function to execute
  * @params: ATCS function params
  *
  * Executes the requested ATCS function (all asics).
  * Returns a pointer to the acpi output buffer.
  */
-static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function,
+static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
+  int function,
   struct acpi_buffer *params)
 {
acpi_status status;
@@ -516,7 +559,7 @@ static union acpi_object *amdgpu_atcs_call(acpi_handle 
handle, int function,
atcs_arg_elements[1].integer.value = 0;
}
 
-   status = acpi_evaluate_object(handle, "ATCS", _arg, );
+   status = acpi_evaluate_object(atcs->handle, "ATCS", _arg, );
 
/* Fail only if calling the method fails and ATIF is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
@@ -550,7 +593,6 @@ static void amdgpu_atcs_parse_functions(struct 
amdgpu_atcs_functions *f, u32 mas
 /**
  * amdgpu_atcs_verify_interface - verify ATCS
  *
- * @handle:

RE: [PATCH] drm/amdgpu/vcn3: remove unused variable.

2021-05-19 Thread Zhang, Boyuan

[AMD Official Use Only]

Patch is
Reviewed-by: Boyuan Zhang 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: May 19, 2021 4:38 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 

Subject: [PATCH] drm/amdgpu/vcn3: remove unused variable.

Not used so remove it.

Fixes: a8ccb542539ff1 ("drm/amdgpu: stop touching sched.ready in the backend")
Signed-off-by: Alex Deucher 
Cc: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 2d67caba9293..946335d0f19c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -381,7 +381,7 @@ static int vcn_v3_0_hw_fini(void *handle)  {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, j;
+   int i;

for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cboyuan.zhang%40amd.com%7Ca747b6a8dba74784461508d91b060cde%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637570535079254026%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=LoAr7Oj6W9AWSMN7r9S%2FmdzRU%2F7ZfACifvY3681yFhw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/vcn3: remove unused variable.

Not used so remove it.

Fixes: a8ccb542539ff1 ("drm/amdgpu: stop touching sched.ready in the backend")
Signed-off-by: Alex Deucher 
Cc: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 2d67caba9293..946335d0f19c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -381,7 +381,7 @@ static int vcn_v3_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
-   int i, j;
+   int i;
 
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/acpi: unify ATCS handling

Treat it like ATIF and check both the dGPU and APU for
the method.  This is required because ATCS may be hung
off of the APU in ACPI on A+A systems.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  17 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 126 ---
 2 files changed, 92 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3147c1c935c8..b92eb068be12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -269,6 +269,7 @@ struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
 struct amdgpu_atif;
+struct amdgpu_atcs;
 struct kfd_vm_fault_info;
 struct amdgpu_hive_info;
 struct amdgpu_reset_context;
@@ -685,20 +686,6 @@ struct amdgpu_vram_scratch {
u64 gpu_addr;
 };
 
-/*
- * ACPI
- */
-struct amdgpu_atcs_functions {
-   bool get_ext_state;
-   bool pcie_perf_req;
-   bool pcie_dev_rdy;
-   bool pcie_bus_width;
-};
-
-struct amdgpu_atcs {
-   struct amdgpu_atcs_functions functions;
-};
-
 /*
  * CGS
  */
@@ -829,7 +816,7 @@ struct amdgpu_device {
struct amdgpu_i2c_chan  *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct amdgpu_atif  *atif;
-   struct amdgpu_atcs  atcs;
+   struct amdgpu_atcs  *atcs;
struct mutexsrbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutexgrbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index bf2939b6eb43..cc8bf2ac77d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -71,12 +71,25 @@ struct amdgpu_atif {
struct amdgpu_dm_backlight_caps backlight_caps;
 };
 
+struct amdgpu_atcs_functions {
+   bool get_ext_state;
+   bool pcie_perf_req;
+   bool pcie_dev_rdy;
+   bool pcie_bus_width;
+};
+
+struct amdgpu_atcs {
+   acpi_handle handle;
+
+   struct amdgpu_atcs_functions functions;
+};
+
 /* Call the ATIF method
  */
 /**
  * amdgpu_atif_call - call an ATIF method
  *
- * @atif: acpi handle
+ * @atif: atif structure
  * @function: the ATIF function to execute
  * @params: ATIF function params
  *
@@ -236,6 +249,35 @@ static acpi_handle amdgpu_atif_probe_handle(acpi_handle 
dhandle)
return handle;
 }
 
+static acpi_handle amdgpu_atcs_probe_handle(acpi_handle dhandle)
+{
+   acpi_handle handle = NULL;
+   char acpi_method_name[255] = { 0 };
+   struct acpi_buffer buffer = { sizeof(acpi_method_name), 
acpi_method_name };
+   acpi_status status;
+
+   /* For PX/HG systems, ATCS and ATPX are in the iGPU's namespace, on 
dGPU only
+* systems, ATIF is in the dGPU's namespace.
+*/
+   status = acpi_get_handle(dhandle, "ATCS", );
+   if (ACPI_SUCCESS(status))
+   goto out;
+
+   if (amdgpu_has_atpx()) {
+   status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATCS",
+);
+   if (ACPI_SUCCESS(status))
+   goto out;
+   }
+
+   DRM_DEBUG_DRIVER("No ATCS handle found\n");
+   return NULL;
+out:
+   acpi_get_name(handle, ACPI_FULL_PATHNAME, );
+   DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
+   return handle;
+}
+
 /**
  * amdgpu_atif_get_notification_params - determine notify configuration
  *
@@ -485,14 +527,15 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 /**
  * amdgpu_atcs_call - call an ATCS method
  *
- * @handle: acpi handle
+ * @atcs: atcs structure
  * @function: the ATCS function to execute
  * @params: ATCS function params
  *
  * Executes the requested ATCS function (all asics).
  * Returns a pointer to the acpi output buffer.
  */
-static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function,
+static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
+  int function,
   struct acpi_buffer *params)
 {
acpi_status status;
@@ -516,7 +559,7 @@ static union acpi_object *amdgpu_atcs_call(acpi_handle 
handle, int function,
atcs_arg_elements[1].integer.value = 0;
}
 
-   status = acpi_evaluate_object(handle, "ATCS", _arg, );
+   status = acpi_evaluate_object(atcs->handle, "ATCS", _arg, );
 
/* Fail only if calling the method fails and ATIF is supported */
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
@@ -550,7 +593,6 @@ static void amdgpu_atcs_parse_functions(struct 
amdgpu_atcs_functions *f, u32 mas
 /**
  * amdgpu_atcs_verify_interface - verify ATCS
  *
- * @handle: acpi handle
  * @atcs: amdgpu atcs struct
  *
  *

[PATCH] drm/amd/display: Fix GPU scaling regression by FS video support

2021-05-19 Thread Nicholas Kazlauskas

[Why]
FS video support regressed GPU scaling and the scaled buffer ends up
stuck in the top left of the screen at native size - full, aspect,
center scaling modes do not function.

This is because decide_crtc_timing_for_drm_display_mode() does not
get called when scaling is enabled.

[How]
Split recalculate timing and scaling into two different flags.

We don't want to call drm_mode_set_crtcinfo() for scaling, but we
do want to call it for FS video.

Optimize and move preferred_refresh calculation next to
decide_crtc_timing_for_drm_display_mode() like it used to be since
that's not used for FS video.

We don't need to copy over the VIC or polarity in the case of FS video
modes because those don't change.

Fixes: a372f4abec ("drm/amd/display: Skip modeset for front porch change")

Cc: Aurabindo Pillai 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8cd270f129..759621b0e8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5672,7 +5672,8 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
struct drm_display_mode saved_mode;
struct drm_display_mode *freesync_mode = NULL;
bool native_mode_found = false;
-   bool recalculate_timing = dm_state ? (dm_state->scaling != RMX_OFF) : 
false;
+   bool recalculate_timing = false;
+   bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
int mode_refresh;
int preferred_refresh = 0;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
@@ -5735,7 +5736,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
 */
DRM_DEBUG_DRIVER("No preferred mode found\n");
} else {
-   recalculate_timing |= amdgpu_freesync_vid_mode &&
+   recalculate_timing = amdgpu_freesync_vid_mode &&
 is_freesync_video_mode(, aconnector);
if (recalculate_timing) {
freesync_mode = 
get_highest_refresh_rate_mode(aconnector, false);
@@ -5743,11 +5744,10 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
mode = *freesync_mode;
} else {
decide_crtc_timing_for_drm_display_mode(
-   , preferred_mode,
-   dm_state ? (dm_state->scaling != RMX_OFF) : 
false);
-   }
+   , preferred_mode, scale);
 
-   preferred_refresh = drm_mode_vrefresh(preferred_mode);
+   preferred_refresh = drm_mode_vrefresh(preferred_mode);
+   }
}
 
if (recalculate_timing)
@@ -5759,7 +5759,7 @@ create_stream_for_sink(struct amdgpu_dm_connector 
*aconnector,
* If scaling is enabled and refresh rate didn't change
* we copy the vic and polarities of the old timings
*/
-   if (!recalculate_timing || mode_refresh != preferred_refresh)
+   if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(
stream, , >base, con_state, NULL,
requested_bpc);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu/pm: display vcn pp dpm (v3)

Enable displaying DPM levels for VCN clocks
in swsmu supported ASICs

v2: removed set functions for navi, renoir
v3: removed set function from arcturus

Signed-off-by: David M Nieto 
Reviewed-by: Lijo Lazar 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 46 +++
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  2 +
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  4 ++
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 34 ++
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 46 +++
 5 files changed, 132 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 77693bf0840c..1735a96dd307 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -822,6 +822,52 @@ static int arcturus_print_clk_levels(struct smu_context 
*smu,
now) ? "*" : ""));
break;
 
+   case SMU_VCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_VCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current vclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get vclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
+   case SMU_DCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_DCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current dclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get dclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
case SMU_PCIE:
gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu);
lane_width = smu_v11_0_get_current_pcie_link_width_level(smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 30f585afeddd..e81310a424e5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1273,6 +1273,8 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_DCLK:
case SMU_DCEFCLK:
ret = navi10_get_current_clk_freq_by_table(smu, clk_type, 
_value);
if (ret)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 0c40a54c46d7..b09c253b9db6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -987,6 +987,10 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_VCLK1:
+   case SMU_DCLK:
+   case SMU_DCLK1:
case SMU_DCEFCLK:
ret = sienna_cichlid_get_current_clk_freq_by_table(smu, 
clk_type, _value);
if (ret)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index f43b4c623685..0805dc439572 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -109,6 +109,8 @@ static struct cmn2asic_mapping 
renoir_clk_map[SMU_CLK_COUNT] = {
CLK_MAP(SOCCLK, CLOCK_SOCCLK),

[PATCH 2/3] drm/amdgpu/pm: add new fields for Navi1x (v3)

Fill voltage fields in metrics table

v2: Removed dpm and freq ranges info
v3: Added check to ensure volrage offset is not zero

Signed-off-by: David M Nieto 
Reviewed-by: Lijo Lazar 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 82 +++
 1 file changed, 65 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index ac13042672ea..30f585afeddd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -505,7 +505,7 @@ static int navi10_tables_init(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -2627,8 +2627,8 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_legacy_t metrics;
int ret = 0;
 
@@ -2646,7 +2646,7 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
mutex_unlock(>metrics_lock);
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2681,17 +2681,29 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
+   if (metrics.CurrGfxVoltageOffset)
+   gpu_metrics->voltage_gfx =
+   (155000 - 625 * metrics.CurrGfxVoltageOffset) / 100;
+   if (metrics.CurrMemVidOffset)
+   gpu_metrics->voltage_mem =
+   (155000 - 625 * metrics.CurrMemVidOffset) / 100;
+   if (metrics.CurrSocVoltageOffset)
+   gpu_metrics->voltage_soc =
+   (155000 - 625 * metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v1_1);
+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
 }
 
 static ssize_t navi10_get_gpu_metrics(struct smu_context *smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
 
@@ -2709,7 +2721,7 @@ static ssize_t navi10_get_gpu_metrics(struct smu_context 
*smu,
 
mutex_unlock(>metrics_lock);
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2746,17 +2758,29 @@ static ssize_t navi10_get_gpu_metrics(struct 
smu_context *smu,
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
+   if (metrics.CurrGfxVoltageOffset)
+   gpu_metrics->voltage_gfx =
+   (155000 - 625 * metrics.CurrGfxVoltageOffset) / 100;
+   if (metrics.CurrMemVidOffset)
+   gpu_metrics->voltage_mem =
+   (155000 - 625 * metrics.CurrMemVidOffset) / 100;
+   if (metrics.CurrSocVoltageOffset)
+   gpu_metrics->voltage_soc =
+   (155000 - 625 * metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v1_1);
+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
 }
 
 static ssize_t navi12_get_legacy_gpu_metrics(struct smu_context *smu,
 void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_NV12_legacy_t metrics;
int ret = 0;
 
@@ -2774,7 +2798,7 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct 
smu_context

[PATCH 1/3] drm/amdgpu/pm: Update metrics table (v2)

v2: removed static dpm and frequency ranges from table

expand metrics table with voltages and frequency ranges

Signed-off-by: David M Nieto 
Reviewed-by: Lijo Lazar 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 69 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c|  3 +
 2 files changed, 72 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e2d13131a432..b1cd52a9d684 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -536,6 +536,75 @@ struct gpu_metrics_v1_2 {
uint64_tfirmware_timestamp;
 };
 
+struct gpu_metrics_v1_3 {
+   struct metrics_table_header common_header;
+
+   /* Temperature */
+   uint16_ttemperature_edge;
+   uint16_ttemperature_hotspot;
+   uint16_ttemperature_mem;
+   uint16_ttemperature_vrgfx;
+   uint16_ttemperature_vrsoc;
+   uint16_ttemperature_vrmem;
+
+   /* Utilization */
+   uint16_taverage_gfx_activity;
+   uint16_taverage_umc_activity; // memory 
controller
+   uint16_taverage_mm_activity; // UVD or VCN
+
+   /* Power/Energy */
+   uint16_taverage_socket_power;
+   uint64_tenergy_accumulator;
+
+   /* Driver attached timestamp (in ns) */
+   uint64_tsystem_clock_counter;
+
+   /* Average clocks */
+   uint16_taverage_gfxclk_frequency;
+   uint16_taverage_socclk_frequency;
+   uint16_taverage_uclk_frequency;
+   uint16_taverage_vclk0_frequency;
+   uint16_taverage_dclk0_frequency;
+   uint16_taverage_vclk1_frequency;
+   uint16_taverage_dclk1_frequency;
+
+   /* Current clocks */
+   uint16_tcurrent_gfxclk;
+   uint16_tcurrent_socclk;
+   uint16_tcurrent_uclk;
+   uint16_tcurrent_vclk0;
+   uint16_tcurrent_dclk0;
+   uint16_tcurrent_vclk1;
+   uint16_tcurrent_dclk1;
+
+   /* Throttle status */
+   uint32_tthrottle_status;
+
+   /* Fans */
+   uint16_tcurrent_fan_speed;
+
+   /* Link width/speed */
+   uint16_tpcie_link_width;
+   uint16_tpcie_link_speed; // in 0.1 GT/s
+
+   uint16_tpadding;
+
+   uint32_tgfx_activity_acc;
+   uint32_tmem_activity_acc;
+
+   uint16_ttemperature_hbm[NUM_HBM_INSTANCES];
+
+   /* PMFW attached timestamp (10ns resolution) */
+   uint64_tfirmware_timestamp;
+
+   /* Voltage (mV) */
+   uint16_tvoltage_soc;
+   uint16_tvoltage_gfx;
+   uint16_tvoltage_mem;
+
+   uint16_tpadding1;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 0934e5b3aa17..0ceb7329838c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -764,6 +764,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t 
frev, uint8_t crev)
case METRICS_VERSION(1, 2):
structure_size = sizeof(struct gpu_metrics_v1_2);
break;
+   case METRICS_VERSION(1, 3):
+   structure_size = sizeof(struct gpu_metrics_v1_3);
+   break;
case METRICS_VERSION(2, 0):
structure_size = sizeof(struct gpu_metrics_v2_0);
break;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] gpu: drm: replace occurrences of invalid character

Pushed out to drm-misc-next.  Also fixed up Michel's name.

Alex

On Wed, May 19, 2021 at 11:56 AM Randy Dunlap  wrote:
>
> On 5/19/21 1:15 AM, Mauro Carvalho Chehab wrote:
> > There are some places at drm that ended receiving a
> > REPLACEMENT CHARACTER U+fffd ('�'), probably because of
> > some bad charset conversion.
> >
> > Fix them by using what it seems   to be the proper
> > character.
> >
> > Signed-off-by: Mauro Carvalho Chehab 
>
> Acked-by: Randy Dunlap 
>
> Thanks.
>
> > ---
> >  drivers/gpu/drm/amd/include/atombios.h   | 10 +-
> >  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.h|  2 +-
> >  drivers/gpu/drm/r128/r128_drv.h  |  2 +-
> >  4 files changed, 8 insertions(+), 8 deletions(-)
> >
>
> --
> ~Randy
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/7] drm/amdgpu/jpeg3: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 94be353..bd77794 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -159,9 +159,9 @@ static int jpeg_v3_0_hw_init(void *handle)
 static int jpeg_v3_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   struct amdgpu_ring *ring;
 
-   ring = >jpeg.inst->ring_dec;
+   cancel_delayed_work_sync(>vcn.idle_work);
+
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
  RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/7] drm/amdgpu/vcn3: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 2d67cab..ce3c794 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -380,15 +380,14 @@ static int vcn_v3_0_hw_init(void *handle)
 static int vcn_v3_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   struct amdgpu_ring *ring;
-   int i, j;
+   int i;
+
+   cancel_delayed_work_sync(>vcn.idle_work);
 
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
 
-   ring = >vcn.inst[i].ring_dec;
-
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != 
AMD_PG_STATE_GATE &&
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/7] drm/amdgpu/vcn2.0: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 116b964..8af567c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -262,6 +262,8 @@ static int vcn_v2_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   cancel_delayed_work_sync(>vcn.idle_work);
+
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
  RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/7] drm/amdgpu/vcn1: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 0c1beef..27b1ced 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -231,9 +231,13 @@ static int vcn_v1_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   cancel_delayed_work_sync(>vcn.idle_work);
+
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
-   RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+   (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+   }
 
return 0;
 }
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/7] drm/amdgpu/jpeg2.0: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index de5abce..85967a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -172,6 +172,8 @@ static int jpeg_v2_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+   cancel_delayed_work_sync(>vcn.idle_work);
+
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
  RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 6/7] drm/amdgpu/jpeg2.5: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 938ef4c..46096ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -187,14 +187,14 @@ static int jpeg_v2_5_hw_init(void *handle)
 static int jpeg_v2_5_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   struct amdgpu_ring *ring;
int i;
 
+   cancel_delayed_work_sync(>vcn.idle_work);
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
 
-   ring = >jpeg.inst[i].ring_dec;
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
  RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
jpeg_v2_5_set_powergating_state(adev, 
AMD_PG_STATE_GATE);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/7] drm/amdgpu/vcn2.5: add cancel_delayed_work_sync before power gate

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 948813d..888b17d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -321,6 +321,8 @@ static int vcn_v2_5_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
 
+   cancel_delayed_work_sync(>vcn.idle_work);
+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

2021-05-19 Thread Nieto, David M

[AMD Official Use Only]

For the case of virtualization, for example, the serial number has no relation 
to the uuid. Which means that at least for virtualization the node needs to be 
created. This may also be the case on other gpus.


From: Christian König 
Sent: Wednesday, May 19, 2021 3:58:35 AM
To: Nieto, David M ; Alex Deucher ; 
Gu, JiaWei (Will) 
Cc: Deng, Emily ; amd-gfx list 

Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

Well I don't think generating an UUID in the kernel makes sense in general.

What we can do is to expose the serial number of the device, so that userspace 
can create an UUID if necessary.

Christian.

Am 18.05.21 um 22:37 schrieb Nieto, David M:

[AMD Official Use Only]

I think the sysfs node should be moved into amdgpu_pm instead of the 
amdgpu_device.c and generation of the unique_id should be moved to 
navi10_ppt.c, similarly to other chips.

Thinking it better, generating a random UUID makes no sense in the driver 
level, any application can do the same thing on userspace if the UUID sysfs 
node is empty.

So, I think we should do the same as with the unique_id node, if the unique_id 
is not present, just return.

David

From: Alex Deucher 
Sent: Tuesday, May 18, 2021 7:12 AM
To: Gu, JiaWei (Will) 
Cc: amd-gfx list 
; Deng, 
Emily ; Nieto, David M 

Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

On Mon, May 17, 2021 at 1:54 AM Jiawei Gu 
 wrote:
>
> Introduce an RFC 4122 compliant UUID for the GPUs derived
> from the unique GPU serial number (from Vega10) on gpus.
> Where this serial number is not available, use a compliant
> random UUID.
>
> For virtualization, the unique ID is passed by the host driver
> in the PF2VF structure.
>
> Signed-off-by: Jiawei Gu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 36 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 96 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c|  4 +
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c |  5 ++
>  drivers/gpu/drm/amd/amdgpu/nv.h |  3 +
>  6 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 3147c1c935c8..ad6d4b55be6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -802,6 +802,40 @@ struct amd_powerplay {
>   (rid == 0x01) || \
>   (rid == 0x10
>
> +union amdgpu_uuid_info {
> +   struct {
> +   union {
> +   struct {
> +   uint32_t did: 16;
> +   uint32_t fcn: 8;
> +   uint32_t asic_7 : 8;
> +   };
> +   uint32_t time_low;
> +   };
> +
> +   struct {
> +   uint32_t time_mid  : 16;
> +   uint32_t time_high : 12;
> +   uint32_t version   : 4;
> +   };
> +
> +   struct {
> +   struct {
> +   uint8_t clk_seq_hi : 6;
> +   uint8_t variant: 2;
> +   };
> +   union {
> +   uint8_t clk_seq_low;
> +   uint8_t asic_6;
> +   };
> +   uint16_t asic_4;
> +   };
> +
> +   uint32_t asic_0;
> +   };
> +   char as_char[16];
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  struct amdgpu_device {
> @@ -1074,6 +1108,8 @@ struct amdgpu_device {
> charproduct_name[32];
> charserial[20];
>
> +   union amdgpu_uuid_info uuid_info;
> +
> struct amdgpu_autodump  autodump;
>
> atomic_tthrottling_logging_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7c6c435e5d02..079841e1cb52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_i2c.h"
> @@ -3239,11 +3240,104 @@ static int 
> amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
> return ret;
>  }
>
> +static bool amdgpu_is_uuid_info_empty(union amdgpu_uuid_info *uuid_info)
> +{
> +

Re: [PATCH 1/3] gpu: drm: replace occurrences of invalid character

2021-05-19 Thread Randy Dunlap

On 5/19/21 1:15 AM, Mauro Carvalho Chehab wrote:
> There are some places at drm that ended receiving a
> REPLACEMENT CHARACTER U+fffd ('�'), probably because of
> some bad charset conversion.
> 
> Fix them by using what it seems   to be the proper
> character.
> 
> Signed-off-by: Mauro Carvalho Chehab 

Acked-by: Randy Dunlap 

Thanks.

> ---
>  drivers/gpu/drm/amd/include/atombios.h   | 10 +-
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.h|  2 +-
>  drivers/gpu/drm/r128/r128_drv.h  |  2 +-
>  4 files changed, 8 insertions(+), 8 deletions(-)
> 

-- 
~Randy

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov


Am 2021-05-19 um 5:02 a.m. schrieb Chengzhe Liu:
> In sriov, cwsr is not stable
NAK. Without CWSR, ROCm is not stable. Any compute application with long
running waves can cause a hang.

Regards,
  Felix

>
> Signed-off-by: Chengzhe Liu 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +---
>  1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 80015e866498..89bd0059329b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info 
> = {
>   .event_interrupt_class = _interrupt_class_v9,
>   .num_of_watch_points = 4,
>   .mqd_size_aligned = MQD_SIZE_ALIGNED,
> - .supports_cwsr = true,
> + .supports_cwsr = false,
>   .needs_iommu_device = false,
>   .needs_pci_atomics = false,
>   .num_sdma_engines = 2,
> @@ -505,6 +505,24 @@ static const struct kfd_device_info 
> sienna_cichlid_device_info = {
>   .num_sdma_queues_per_engine = 8,
>  };
>  
> +static const struct kfd_device_info sienna_cichlid_vf_device_info = {
> + .asic_family = CHIP_SIENNA_CICHLID,
> + .asic_name = "sienna_cichlid",
> + .max_pasid_bits = 16,
> + .max_no_of_hqd  = 24,
> + .doorbell_size  = 8,
> + .ih_ring_entry_size = 8 * sizeof(uint32_t),
> + .event_interrupt_class = _interrupt_class_v10,
> + .num_of_watch_points = 4,
> + .mqd_size_aligned = MQD_SIZE_ALIGNED,
> + .needs_iommu_device = false,
> + .supports_cwsr = false,
> + .needs_pci_atomics = true,
> + .num_sdma_engines = 4,
> + .num_xgmi_sdma_engines = 0,
> + .num_sdma_queues_per_engine = 8,
> +};
> +
>  static const struct kfd_device_info navy_flounder_device_info = {
>   .asic_family = CHIP_NAVY_FLOUNDER,
>   .asic_name = "navy_flounder",
> @@ -601,7 +619,7 @@ static const struct kfd_device_info 
> *kfd_supported_devices[][2] = {
>   [CHIP_NAVI10] = {_device_info, NULL},
>   [CHIP_NAVI12] = {_device_info, _device_info},
>   [CHIP_NAVI14] = {_device_info, NULL},
> - [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
> _cichlid_device_info},
> + [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
> _cichlid_vf_device_info},
>   [CHIP_NAVY_FLOUNDER] = {_flounder_device_info, 
> _flounder_device_info},
>   [CHIP_VANGOGH] = {_device_info, NULL},
>   [CHIP_DIMGREY_CAVEFISH] = {_cavefish_device_info, 
> _cavefish_device_info},
> @@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>  
>  static void kfd_cwsr_init(struct kfd_dev *kfd)
>  {
> - if (cwsr_enable && kfd->device_info->supports_cwsr) {
> + if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 
> 2) {
>   if (kfd->device_info->asic_family < CHIP_VEGA10) {
>   BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
>   kfd->cwsr_isa = cwsr_trap_gfx8_hex;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/3] drm/amdgpu/pm: display vcn pp dpm

2021-05-19 Thread Lijo Lazar




Avoid changes to sienna_cichlid_force_clk_levels as well.
With that addressed patch is

Reviewed-by: Lijo Lazar 

On 5/19/2021 11:32 AM, David M Nieto wrote:

Enable displaying DPM levels for VCN clocks
in swsmu supported ASICs

Signed-off-by: David M Nieto 
---
  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 46 +++
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  2 +
  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  8 
  .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 34 ++
  .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 46 +++
  5 files changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 77693bf0840c..1735a96dd307 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -822,6 +822,52 @@ static int arcturus_print_clk_levels(struct smu_context 
*smu,
now) ? "*" : ""));
break;
  
+	case SMU_VCLK:

+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_VCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current vclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get vclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
+   case SMU_DCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_DCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current dclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get dclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
case SMU_PCIE:
gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu);
lane_width = smu_v11_0_get_current_pcie_link_width_level(smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index b8971303a873..7763de464678 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1273,6 +1273,8 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_DCLK:
case SMU_DCEFCLK:
ret = navi10_get_current_clk_freq_by_table(smu, clk_type, 
_value);
if (ret)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 0c40a54c46d7..6da6d08d8858 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -987,6 +987,10 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_VCLK1:
+   case SMU_DCLK:
+   case SMU_DCLK1:
case SMU_DCEFCLK:
ret = sienna_cichlid_get_current_clk_freq_by_table(smu, clk_type, 
_value);
if (ret)
@@ -1150,6 +1154,10 @@ static int sienna_cichlid_force_clk_levels(struct 
smu_context *smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_VCLK1:
+   case SMU_DCLK:
+   case SMU_DCLK1:
/* There is only 2 levels for fine grained DPM */
if

Re: [PATCH 2/3] drm/amdgpu/pm: add new fields for Navi1x

2021-05-19 Thread Lijo Lazar

Add a check of non-zero offsets so that it doesn't show a static voltage 
of 1.55v all the time. With that addressed the patch is


Reviewed-by: Lijo Lazar 

On 5/19/2021 11:32 AM, David M Nieto wrote:

Fill voltage fields in metrics table

Signed-off-by: David M Nieto 
---
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 58 +--
  1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index ac13042672ea..b8971303a873 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -505,7 +505,7 @@ static int navi10_tables_init(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
  
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);

+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -2627,8 +2627,8 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 void **table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_legacy_t metrics;
int ret = 0;
  
@@ -2646,7 +2646,7 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct smu_context *smu,
  
  	mutex_unlock(>metrics_lock);
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
  
  	gpu_metrics->temperature_edge = metrics.TemperatureEdge;

gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2681,17 +2681,23 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
  
  	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
  
+	gpu_metrics->voltage_gfx = (155000 - 625 * metrics.CurrGfxVoltageOffset) / 100;

+   gpu_metrics->voltage_mem = (155000 - 625 * metrics.CurrMemVidOffset) / 
100;
+   gpu_metrics->voltage_soc = (155000 - 625 * 
metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
  
-	return sizeof(struct gpu_metrics_v1_1);

+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
  }
  
  static ssize_t navi10_get_gpu_metrics(struct smu_context *smu,

  void **table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
  
@@ -2709,7 +2715,7 @@ static ssize_t navi10_get_gpu_metrics(struct smu_context *smu,
  
  	mutex_unlock(>metrics_lock);
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
  
  	gpu_metrics->temperature_edge = metrics.TemperatureEdge;

gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2746,17 +2752,23 @@ static ssize_t navi10_get_gpu_metrics(struct 
smu_context *smu,
  
  	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
  
+	gpu_metrics->voltage_gfx = (155000 - 625 * metrics.CurrGfxVoltageOffset) / 100;

+   gpu_metrics->voltage_mem = (155000 - 625 * metrics.CurrMemVidOffset) / 
100;
+   gpu_metrics->voltage_soc = (155000 - 625 * 
metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
  
-	return sizeof(struct gpu_metrics_v1_1);

+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
  }
  
  static ssize_t navi12_get_legacy_gpu_metrics(struct smu_context *smu,

 void **table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_NV12_legacy_t metrics;
int ret = 0;
  
@@ -2774,7 +2786,7 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct smu_context *smu,
  
  	mutex_unlock(>metrics_lock);
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
  
  	gpu_metrics->temperature_edge = metrics.TemperatureEdge;

gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2814,17 +2826,23 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct 
smu_context

Re: [PATCH 1/3] drm/amdgpu/pm: Update metrics table

2021-05-19 Thread Lijo Lazar


Reviewed-by: Lijo Lazar 


On 5/19/2021 11:32 AM, David M Nieto wrote:

expand metrics table with voltages and frequency ranges

Signed-off-by: David M Nieto 
---
  .../gpu/drm/amd/include/kgd_pp_interface.h| 69 +++
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c|  3 +
  2 files changed, 72 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index e2d13131a432..b1cd52a9d684 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -536,6 +536,75 @@ struct gpu_metrics_v1_2 {
uint64_tfirmware_timestamp;
  };
  
+struct gpu_metrics_v1_3 {

+   struct metrics_table_header common_header;
+
+   /* Temperature */
+   uint16_ttemperature_edge;
+   uint16_ttemperature_hotspot;
+   uint16_ttemperature_mem;
+   uint16_ttemperature_vrgfx;
+   uint16_ttemperature_vrsoc;
+   uint16_ttemperature_vrmem;
+
+   /* Utilization */
+   uint16_taverage_gfx_activity;
+   uint16_taverage_umc_activity; // memory 
controller
+   uint16_taverage_mm_activity; // UVD or VCN
+
+   /* Power/Energy */
+   uint16_taverage_socket_power;
+   uint64_tenergy_accumulator;
+
+   /* Driver attached timestamp (in ns) */
+   uint64_tsystem_clock_counter;
+
+   /* Average clocks */
+   uint16_taverage_gfxclk_frequency;
+   uint16_taverage_socclk_frequency;
+   uint16_taverage_uclk_frequency;
+   uint16_taverage_vclk0_frequency;
+   uint16_taverage_dclk0_frequency;
+   uint16_taverage_vclk1_frequency;
+   uint16_taverage_dclk1_frequency;
+
+   /* Current clocks */
+   uint16_tcurrent_gfxclk;
+   uint16_tcurrent_socclk;
+   uint16_tcurrent_uclk;
+   uint16_tcurrent_vclk0;
+   uint16_tcurrent_dclk0;
+   uint16_tcurrent_vclk1;
+   uint16_tcurrent_dclk1;
+
+   /* Throttle status */
+   uint32_tthrottle_status;
+
+   /* Fans */
+   uint16_tcurrent_fan_speed;
+
+   /* Link width/speed */
+   uint16_tpcie_link_width;
+   uint16_tpcie_link_speed; // in 0.1 GT/s
+
+   uint16_tpadding;
+
+   uint32_tgfx_activity_acc;
+   uint32_tmem_activity_acc;
+
+   uint16_ttemperature_hbm[NUM_HBM_INSTANCES];
+
+   /* PMFW attached timestamp (10ns resolution) */
+   uint64_tfirmware_timestamp;
+
+   /* Voltage (mV) */
+   uint16_tvoltage_soc;
+   uint16_tvoltage_gfx;
+   uint16_tvoltage_mem;
+
+   uint16_tpadding1;
+};
+
  /*
   * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
   * Use gpu_metrics_v2_1 or later instead.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 0934e5b3aa17..0ceb7329838c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -764,6 +764,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t 
frev, uint8_t crev)
case METRICS_VERSION(1, 2):
structure_size = sizeof(struct gpu_metrics_v1_2);
break;
+   case METRICS_VERSION(1, 3):
+   structure_size = sizeof(struct gpu_metrics_v1_3);
+   break;
case METRICS_VERSION(2, 0):
structure_size = sizeof(struct gpu_metrics_v2_0);
break;



--
Thanks,
Lijo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs

Am 2021-05-19 um 6:04 a.m. schrieb Christian König:
> Am 19.05.21 um 07:45 schrieb Felix Kuehling:
>> SG BOs such as dmabuf imports and userptr BOs do not consume system
>> resources directly. Instead they point to resources owned elsewhere.
>> They typically get evicted by DMABuf move notifiers of MMU notifiers.
>> If those notifiers don't need to wait for hardware fences (i.e. the SG
>> BOs are used in a preemptible context), then we don't need to limit
>> them to the GTT size and we don't need TTM to evict them.
>>
>> Create a new domain for such preemptible SG BOs that does not impose
>> artificial size limits and TTM evictions.
>
> Please don't create an GEM domain for this. This has just to much
> potential to be abused by userspace.
>
> The kernel is the only place where we can decide if the BO is
> preemptible or not.

I did put a check in amdgpu_gem_create_ioctl to prevent user mode from
directly creating preemptible BOs.

Instead of a domain I can use a flag in the BO. But if I put it in a
flag (say AMDGPU_GEM_CREATE_PREEMPTIBLE), that's also accessible to user
mode and I need to filter it out in the ioctl API. I don't see how
that's any different.

Any other ideas?

Thanks,
  Felix


>
> Christian.
>
>>
>> Signed-off-by: Felix Kuehling 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile   |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |   4 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   8 +
>>   .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  37 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  11 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |   3 +-
>>   include/uapi/drm/amdgpu_drm.h |   7 +-
>>   8 files changed, 258 insertions(+), 9 deletions(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6331a11299d0..6cf0fe871d6c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>   atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>>   amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>> -    amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
>> amdgpu_atomfirmware.o \
>> -    amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
>> -    amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o
>> amdgpu_ras.o amdgpu_vm_cpu.o \
>> +    amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o
>> amdgpu_virt.o \
>> +    amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
>> +    amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
>> +    amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
>>   amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>> amdgpu_nbio.o \
>>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>   amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 311bcdc59eda..280cc0c0a9b3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device
>> *dev, void *data,
>>   if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>>   return -EINVAL;
>>   +    /* preemptible domain not supported by current CS API */
>> +    if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
>> +    return -EINVAL;
>> +
>>   if (!amdgpu_is_tmz(adev) && (flags &
>> AMDGPU_GEM_CREATE_ENCRYPTED)) {
>>   DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is
>> disabled\n");
>>   return -EINVAL;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 745fcf3ea450..5b538e746afa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>>   c++;
>>   }
>>   +    if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {
>> +    places[c].fpfn = 0;
>> +    places[c].lpfn = 0;
>> +    places[c].mem_type = AMDGPU_PL_PREEMPT;
>> +    places[c].flags = 0;
>> +    c++;
>> +    }
>> +
>>   if (!c) {
>>   places[c].fpfn = 0;
>>   places[c].lpfn = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> new file mode 100644
>> index ..b4185dc3c394
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> @@ -0,0 +1,190 @@
>> +/*
>> + * Copyright 2016-2021 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby

Re: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and swapin

Looks like we're creating the userptr BO as ttm_bo_type_device. I guess
we should be using ttm_bo_type_sg? BTW, amdgpu_gem_userptr_ioctl also
uses ttm_bo_type_device.

Regards,
  Felix


Am 2021-05-19 um 6:01 a.m. schrieb Christian König:
> I'm scratching my head how that is even possible.
>
> See when a BO is created in the system domain it is just an empty
> hull, e.g. without backing store and allocated pages.
>
> So the swapout function will just ignore it.
>
> Christian.
>
> Am 19.05.21 um 07:07 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I have reverted Chris'  patch, still hit this failure.
>> Just see two lines in Chris' patch. Any BO in cpu domian would be
>> swapout first. That is why we hit this issue frequently now. But the
>> bug is there long time ago.
>>
>> -   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> -   list_for_each_entry(bo, >swap_lru[i], swap) {
>> [snip]
>> +   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> +   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>>
>>
>> 
>> 发件人: Pan, Xinhui 
>> 发送时间: 2021年5月19日 12:09
>> 收件人: Kuehling, Felix; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian;
>> dri-de...@lists.freedesktop.org; dan...@ffwll.ch
>> 主题: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to
>> swapout and swapin
>>
>> yes, we really dont swapout SG BOs.
>> The problems is that before we validate a userptr BO, we create this
>> BO in CPU domain by default. So this BO has chance to swapout.
>>
>> we set flag TTM_PAGE_FLAG_SG on userptr BO in popluate() which is too
>> late.
>> I have not try to revert Chris' patch as I think it desnt help. Or I
>> can have a try later.
>>
>> 
>> 发件人: Kuehling, Felix 
>> 发送时间: 2021年5月19日 11:29
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian;
>> dri-de...@lists.freedesktop.org; dan...@ffwll.ch
>> 主题: Re: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to
>> swapout and swapin
>>
>> Swapping SG BOs makes no sense, because TTM doesn't own the pages of
>> this type of BO.
>>
>> Last I checked, userptr BOs (and other SG BOs) were protected from
>> swapout by the fact that they would not be added to the swap-LRU. But it
>> looks like Christian just removed the swap-LRU. I guess this broke that
>> protection:
>>
>> commit 2cb51d22d70b18eaf339abf9758bf0b7608da65c
>> Author: Christian König 
>> Date:   Tue Oct 6 16:30:09 2020 +0200
>>
>>   drm/ttm: remove swap LRU v3
>>
>>   Instead evict round robin from each devices SYSTEM and TT domain.
>>
>>   v2: reorder num_pages access reported by Dan's script
>>   v3: fix rebase fallout, num_pages should be 32bit
>>
>>   Signed-off-by: Christian König 
>>   Tested-by: Nirmoy Das 
>>   Reviewed-by: Huang Rui 
>>   Reviewed-by: Matthew Auld 
>>   Link: https://patchwork.freedesktop.org/patch/424009/
>>
>> Regards,
>>     Felix
>>
>>
>> On 2021-05-18 10:28 p.m., xinhui pan wrote:
>>> cpu 1   cpu 2
>>> kfd alloc BO A(userptr) alloc BO B(GTT)
>>>   ->init -> validate   -> init ->
>>> validate -> populate
>>>   init_user_pages    -> swapout BO A
>>> //hit ttm pages limit
>>>    -> get_user_pages (fill up ttm->pages)
>>>     -> validate -> populate
>>>     -> swapin BO A // Now hit the BUG
>>>
>>> We know that get_user_pages may race with swapout on same BO.
>>> Threre are some issues I have met.
>>> 1) memory corruption.
>>> This is because we do a swap before memory is setup. ttm_tt_swapout()
>>> just create a swap_storage with its content being 0x0. So when we setup
>>> memory after the swapout. The following swapin makes the memory
>>> corrupted.
>>>
>>> 2) panic
>>> When swapout happes with get_user_pages, they touch ttm->pages without
>>> anylock. It causes memory corruption too. But I hit page fault mostly.
>>>
>>> Signed-off-by: xinhui pan 
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16
>>> +++-
>>>    1 file changed, 15 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 928e8d57cd08..42460e4480f8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -835,6 +835,7 @@ static int init_user_pages(struct kgd_mem *mem,
>>> uint64_t user_addr)
>>>    struct amdkfd_process_info *process_info = mem->process_info;
>>>    struct amdgpu_bo *bo = mem->bo;
>>>    struct ttm_operation_ctx ctx = { true, false };
>>> + struct page **pages;
>>>    int ret = 0;
>>>
>>>    mutex_lock(_info->lock);
>>> @@ -852,7 +853,13 @@ static int init_user_pages(struct kgd_mem *mem,
>>> uint64_t user_addr)
>>>

Re: 回复: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov


Hi Horace,

that is correct, but also completely irrelevant.

What we do here is to wait for the TLB flush to avoid starting 
operations with invalid cache data.


But a parallel FLR clears the cache anyway and also prevents any new 
operation from starting, so it is perfectly valid to timeout and just 
continue with an error message.



On the other hand waiting for 6 seconds in a busy loop will most likely 
trigger the watchdog timer and potentially kill our process.


That is a rather clear no-go, we simply can't increase timeouts infinitely.

Regards,
Christian.

Am 19.05.21 um 16:39 schrieb Chen, Horace:


[AMD Official Use Only]


Hi Christian,

I think the problem is that a non-FLRed VF will not know that another 
VF got an FLR, unless host triggered a whole GPU reset.
So in the worst situation, for example the VF0 to VF10 are all hang 
and will be FLRed one by one, the VF11 will not know that there are 
FLRs happened, in VF11's prespective, it just see the fence didn't 
come back for about 5.5(0.5*11) seconds.


Thanks & Regards,
Horace.


*发件人:* Koenig, Christian 
*发送时间:* 2021年5月19日 19:49
*收件人:* Liu, Cheng Zhe ; Christian König 
; amd-gfx@lists.freedesktop.org 

*抄送:* Xiao, Jack ; Xu, Feifei ; 
Wang, Kevin(Yang) ; Tuikov, Luben 
; Deucher, Alexander 
; Zhang, Hawking ; 
Chen, Horace 

*主题:* Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov
Yeah, but you can't do that it will probably trigger the watchdog timer.

The usec_timeout is named this way because it is a usec timeout.
Anything large than 1ms is a no-go here.

When the other instances do a FLR we don't really need to wait for the
TLB flush anyway since any FLR will kill that.

Christian.

Am 19.05.21 um 13:08 schrieb Liu, Cheng Zhe:
> [AMD Official Use Only]
>
> We support 12 VF at most. In worst case, the first 11 all IDLE fail 
and do FLR, it will need 11 * 500ms to switch to the 12nd VF,

> so I set 12 * 500ms  for the timeout.
>
> -Original Message-
> From: Christian König 
> Sent: Wednesday, May 19, 2021 6:08 PM
> To: Liu, Cheng Zhe ; amd-gfx@lists.freedesktop.org
> Cc: Xiao, Jack ; Xu, Feifei ; 
Wang, Kevin(Yang) ; Tuikov, Luben 
; Deucher, Alexander 
; Koenig, Christian 
; Zhang, Hawking 

> Subject: Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov
>
> Am 19.05.21 um 11:32 schrieb Chengzhe Liu:
>> When there is 12 VF, we need to increase the timeout
> NAK, 6 seconds is way to long to wait polling on a fence.
>
> Why should an invalidation take that long? The engine are per VF 
just to avoid exactly that problem.

>
> Christian.
>
>> Signed-off-by: Chengzhe Liu 
>> ---
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
>>    2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index f02dc904e4cf..a5f005c5d0ec 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,

>>   uint32_t seq;
>>   uint16_t queried_pasid;
>>   bool ret;
>> +    uint32_t sriov_usec_timeout = 600; /* wait for 12 * 500ms for
>> +SRIOV */
>>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>>   struct amdgpu_kiq *kiq = >gfx.kiq;
>>
>> @@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct
>> amdgpu_device *adev,
>>
>>   amdgpu_ring_commit(ring);
>> spin_unlock(>gfx.kiq.ring_lock);
>> -    r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);

>> +    if (amdgpu_sriov_vf(adev))
>> +    r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);

>> +    else
>> +    r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);

>>   if (r < 1) {
>>   dev_err(adev->dev, "wait for kiq fence error: 
%ld.\n", r);

>>   return -ETIME;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index ceb3968d8326..e4a18d8f75c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,

>>   uint32_t seq;
>>   uint16_t queried_pasid;
>>   bool ret;
>> +    uint32_t sriov_usec_timeout = 600; /* wait for 12 * 500ms for
>> +SRIOV */
>>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>>   struct amdgpu_kiq *kiq = >gfx.kiq;
>>
>> @@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct
>> amdgpu_device *adev,
>>
>>   amdgpu_ring_commit(ring);
>> spin_unlock(>gfx.kiq.ring_lock);
>> -    r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);

>> +    if (amdgpu_sriov_vf(adev))
>>

回复: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

2021-05-19 Thread Chen, Horace

[AMD Official Use Only]

Hi Christian,

I think the problem is that a non-FLRed VF will not know that another VF got an 
FLR, unless host triggered a whole GPU reset.
So in the worst situation, for example the VF0 to VF10 are all hang and will be 
FLRed one by one, the VF11 will not know that there are FLRs happened, in 
VF11's prespective, it just see the fence didn't come back for about 
5.5(0.5*11) seconds.

Thanks & Regards,
Horace.


发件人: Koenig, Christian 
发送时间: 2021年5月19日 19:49
收件人: Liu, Cheng Zhe ; Christian König 
; amd-gfx@lists.freedesktop.org 

抄送: Xiao, Jack ; Xu, Feifei ; Wang, 
Kevin(Yang) ; Tuikov, Luben ; 
Deucher, Alexander ; Zhang, Hawking 
; Chen, Horace 
主题: Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

Yeah, but you can't do that it will probably trigger the watchdog timer.

The usec_timeout is named this way because it is a usec timeout.
Anything large than 1ms is a no-go here.

When the other instances do a FLR we don't really need to wait for the
TLB flush anyway since any FLR will kill that.

Christian.

Am 19.05.21 um 13:08 schrieb Liu, Cheng Zhe:
> [AMD Official Use Only]
>
> We support 12 VF at most. In worst case, the first 11 all IDLE fail and do 
> FLR, it will need 11 * 500ms to switch to the 12nd VF,
> so I set 12 * 500ms  for the timeout.
>
> -Original Message-
> From: Christian König 
> Sent: Wednesday, May 19, 2021 6:08 PM
> To: Liu, Cheng Zhe ; amd-gfx@lists.freedesktop.org
> Cc: Xiao, Jack ; Xu, Feifei ; Wang, 
> Kevin(Yang) ; Tuikov, Luben ; 
> Deucher, Alexander ; Koenig, Christian 
> ; Zhang, Hawking 
> Subject: Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov
>
> Am 19.05.21 um 11:32 schrieb Chengzhe Liu:
>> When there is 12 VF, we need to increase the timeout
> NAK, 6 seconds is way to long to wait polling on a fence.
>
> Why should an invalidation take that long? The engine are per VF just to 
> avoid exactly that problem.
>
> Christian.
>
>> Signed-off-by: Chengzhe Liu 
>> ---
>>drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
>>drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
>>2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index f02dc904e4cf..a5f005c5d0ec 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
>> amdgpu_device *adev,
>>   uint32_t seq;
>>   uint16_t queried_pasid;
>>   bool ret;
>> +uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for
>> +SRIOV */
>>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>>   struct amdgpu_kiq *kiq = >gfx.kiq;
>>
>> @@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct
>> amdgpu_device *adev,
>>
>>   amdgpu_ring_commit(ring);
>>   spin_unlock(>gfx.kiq.ring_lock);
>> -r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +if (amdgpu_sriov_vf(adev))
>> +r = amdgpu_fence_wait_polling(ring, seq, 
>> sriov_usec_timeout);
>> +else
>> +r = amdgpu_fence_wait_polling(ring, seq, 
>> adev->usec_timeout);
>>   if (r < 1) {
>>   dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
>> r);
>>   return -ETIME;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index ceb3968d8326..e4a18d8f75c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
>> amdgpu_device *adev,
>>   uint32_t seq;
>>   uint16_t queried_pasid;
>>   bool ret;
>> +uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for
>> +SRIOV */
>>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>>   struct amdgpu_kiq *kiq = >gfx.kiq;
>>
>> @@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct
>> amdgpu_device *adev,
>>
>>   amdgpu_ring_commit(ring);
>>   spin_unlock(>gfx.kiq.ring_lock);
>> -r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +if (amdgpu_sriov_vf(adev))
>> +r = amdgpu_fence_wait_polling(ring, seq, 
>> sriov_usec_timeout);
>> +else
>> +r = amdgpu_fence_wait_polling(ring, seq, 
>> adev->usec_timeout);
>>   if (r < 1) {
>>   dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
>> r);
>>   up_read(>reset_sem);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH v5 01/10] drm/amdgpu: Indirect register access for Navi12 sriov

2021-05-19 Thread Zhou, Peng Ju

[AMD Official Use Only - Internal Distribution Only]

Ping on this series.


-- 
BW
Pengju Zhou



> -Original Message-
> From: amd-gfx  On Behalf Of Zhou,
> Peng Ju
> Sent: Monday, May 17, 2021 10:50 PM
> To: Alex Deucher ; amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily ; Ming, Davis
> ; Chang, HaiJun 
> Subject: RE: [PATCH v5 01/10] drm/amdgpu: Indirect register access for
> Navi12 sriov
> 
> [AMD Official Use Only - Internal Distribution Only]
> 
> Hi Alex
> About your comment:
> "I think patches 1-4, 16 need to be squashed together to avoid breaking the
> build.  Please also provide a description of how the new macros work in the
> patch description.  Describe how the reworked macros properly handle
> sending GC and MMHUB accesses via the RLC rather than via some other
> mechanism.  It's really hard to follow the macro logic."
> 
> I squashed patches 1-4, 16 and add more detail description in the patch
> description.
> Can you help to review the patch series?
> 
> 
> --
> BW
> Pengju Zhou
> 
> 
> 
> > -Original Message-
> > From: Peng Ju Zhou 
> > Sent: Monday, May 17, 2021 10:39 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Zhou, Peng Ju 
> > Subject: [PATCH v5 01/10] drm/amdgpu: Indirect register access for Navi12
> > sriov
> >
> > This patch series are used for GC/MMHUB(part)/IH_RB_CNTL indirect
> access
> > in the SRIOV environment.
> >
> > There are 4 bits, controlled by host, to control if
> > GC/MMHUB(part)/IH_RB_CNTL indirect access enabled.
> > (one bit is master bit controls other 3 bits)
> >
> > For GC registers, changing all the register access from MMIO to RLC and use
> > RLC as the default access method in the full access time.
> >
> > For partial MMHUB registers, changing their access from MMIO to RLC in
> the
> > full access time, the remaining registers keep the original access method.
> >
> > For IH_RB_CNTL register, changing it's access from MMIO to PSP.
> >
> > Signed-off-by: Peng Ju Zhou 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h|  4 +-
> >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 78 +--
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  9 ++-
> >  drivers/gpu/drm/amd/amdgpu/soc15_common.h  | 87 +
> --
> > ---
> >  6 files changed, 97 insertions(+), 84 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 3147c1c935c8..4e0c90e52ab6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -1147,6 +1147,7 @@ int emu_soc_asic_init(struct amdgpu_device
> > *adev);
> >   * Registers read & write functions.
> >   */
> >  #define AMDGPU_REGS_NO_KIQ(1<<1)
> > +#define AMDGPU_REGS_RLC(1<<2)
> >
> >  #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg),
> > AMDGPU_REGS_NO_KIQ)  #define WREG32_NO_KIQ(reg, v)
> > amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 7c6c435e5d02..a2392bbe1e21 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -490,7 +490,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct
> > amdgpu_device *adev,
> > adev->gfx.rlc.funcs &&
> > adev->gfx.rlc.funcs->is_rlcg_access_range) {
> > if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
> > -   return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);
> > +   return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0,
> > 0);
> > } else {
> > writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> > }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
> > index 4fc2ce8ce8ab..7a4775ab6804 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
> > @@ -127,8 +127,8 @@ struct amdgpu_rlc_funcs {
> > void (*reset)(struct amdgpu_device *adev);
> > void (*start)(struct amdgpu_device *adev);
> > void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned
> > vmid);
> > -   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32
> > flag);
> > -   u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 flag);
> > +   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32
> > acc_flags, u32 hwip);
> > +   u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32
> > +acc_flags, u32 hwip);
> > bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t
> > reg);  };
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index

Re: [PATCH] drm/sched: Avoid data corruptions


Am 19.05.21 um 16:14 schrieb Andrey Grodzovsky:

Wait for all dependencies of a job  to complete before
killing it to avoid data corruptions.

Signed-off-by: Andrey Grodzovsky 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/scheduler/sched_entity.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 2e93e881b65f..d5cf61972558 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -222,11 +222,16 @@ static void drm_sched_entity_kill_jobs_cb(struct 
dma_fence *f,
  static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
  {
struct drm_sched_job *job;
+   struct dma_fence *f;
int r;
  
  	while ((job = to_drm_sched_job(spsc_queue_pop(>job_queue {

struct drm_sched_fence *s_fence = job->s_fence;
  
+		/* Wait for all dependencies to avoid data corruptions */

+   while ((f = job->sched->ops->dependency(job, entity)))
+   dma_fence_wait(f);
+
drm_sched_fence_scheduled(s_fence);
dma_fence_set_error(_fence->finished, -ESRCH);
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/sched: Avoid data corruptions

Wait for all dependencies of a job  to complete before
killing it to avoid data corruptions.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 2e93e881b65f..d5cf61972558 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -222,11 +222,16 @@ static void drm_sched_entity_kill_jobs_cb(struct 
dma_fence *f,
 static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
 {
struct drm_sched_job *job;
+   struct dma_fence *f;
int r;
 
while ((job = to_drm_sched_job(spsc_queue_pop(>job_queue {
struct drm_sched_fence *s_fence = job->s_fence;
 
+   /* Wait for all dependencies to avoid data corruptions */
+   while ((f = job->sched->ops->dependency(job, entity)))
+   dma_fence_wait(f);
+
drm_sched_fence_scheduled(s_fence);
dma_fence_set_error(_fence->finished, -ESRCH);
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: AMDGPU error: "[drm:amdgpu_dm_atomic_commit_tail [amdgpu]] ERROR Waiting for fences timed out!"

On Wed, May 19, 2021 at 4:48 AM Michel Dänzer  wrote:
>
> On 2021-05-19 12:05 a.m., Alex Deucher wrote:
> > On Tue, May 18, 2021 at 10:11 AM Michel Dänzer  wrote:
> >>
> >> On 2021-05-17 11:33 a.m., xgqt wrote:
> >>> Hello!
> >>>
> >>> I run a AMD laptop "81NC Lenovo IdeaPad S340-15API" - AMD Ryzen 5 3500U 
> >>> with Radeon Vega 8 Graphics.
> >>> Recently some breakages started happening for me. In about 1h after 
> >>> boot-up while using a KDE desktop machine GUI would freeze. Sometimes it 
> >>> would be possible to move the mouse but the rest will be frozen. Screen 
> >>> may start blinking or go black.
> >>>
> >>> I'm not sure if this is my kernel, firmware or the hardware.
> >>> I don't understands dmesg that's why I'm guessing, but I think it is the 
> >>> firmware since this behavior started around 2021-05-15.
> >>> From my Portage logs I see that I updated my firmware on 2021-05-14 at 
> >>> 18:16:06.
> >>> So breakages started with my kernel: 5.10.27 and FW: 20210511.
> >>> After breakage I jumped to a older kernel 5.4.97 and compiled 5.12.4. I 
> >>> didn't notice a breakage on 5.4.97 but system ran ~40 minutes.
> >>> So I booted to newly compiled 5.12.4 where I was ~1h and it broke.
> >>> After that I booted to 5.4.97 again and downgraded my FW.
> >>> While I'm writing this I'm booted to kernel: 5.12.4 with FW: 20210315.
> >>>
> >>> I also described my situation on the Gentoo bugzilla: 
> >>> https://bugs.gentoo.org/790566
> >>>
> >>> "dmesg.log" attached here is from the time machine run fine (at the 
> >>> moment); "errors_sat_may_15_072825_pm_cest_2021.log" is a dmesg log from 
> >>> the time system broke
> >>>
> >>> Can I get any help with this? What are the next steps I should take? Any 
> >>> other files I should provide?
> >>
> >> I've hit similar hangs with a Lenovo ThinkPad E595 (Ryzen 7 3700U / 
> >> Picasso / RAVEN 0x1002:0x15D8 0x17AA:0x5124 0xC1). I'm also suspecting 
> >> them to be firware related. The hangs occurred with firmware from the AMD 
> >> 20.50 release. I'm currently running with firmware from the 20.40 release, 
> >> no hang in almost 2 weeks (the hangs happened within 1-2 days after boot).
> >
> > Can you narrow down which firmware(s) cause the problem?
>
> I'll try, but note I'm not really sure yet my hangs were related to firmware 
> (only). Anyway, I'll try narrowing it down.

Thanks.  Does this patch help?
https://patchwork.freedesktop.org/patch/433701/

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: New uAPI for color management proposal and feedback request

2021-05-19 Thread Ville Syrjälä

On Wed, May 19, 2021 at 12:34:05PM +0300, Pekka Paalanen wrote:
> On Wed, 12 May 2021 16:04:16 +0300
> Ville Syrjälä  wrote:
> 
> > On Wed, May 12, 2021 at 02:06:56PM +0200, Werner Sembach wrote:
> > > Hello,
> > > 
> > > In addition to the existing "max bpc", and "Broadcast RGB/output_csc" drm 
> > > properties I propose 4 new properties:
> > > "preferred pixel encoding", "active color depth", "active color range", 
> > > and "active pixel encoding"
> > > 
> > > 
> > > Motivation:
> > > 
> > > Current monitors have a variety pixel encodings available: RGB, YCbCr 
> > > 4:4:4, YCbCr 4:2:2, YCbCr 4:2:0.
> > > 
> > > In addition they might be full or limited RGB range and the monitors 
> > > accept different bit depths.
> > > 
> > > Currently the kernel driver for AMD and Intel GPUs automatically 
> > > configure the color settings automatically with little
> > > to no influence of the user. However there are several real world 
> > > scenarios where the user might disagree with the
> > > default chosen by the drivers and wants to set his or her own preference.
> > > 
> > > Some examples:
> > > 
> > > 1. While RGB and YCbCr 4:4:4 in theory carry the same amount of color 
> > > information, some screens might look better on one
> > > than the other because of bad internal conversion. The driver currently 
> > > however has a fixed default that is chosen if
> > > available (RGB for Intel and YCbCr 4:4:4 for AMD). The only way to change 
> > > this currently is by editing and overloading
> > > the edid reported by the monitor to the kernel.
> > > 
> > > 2. RGB and YCbCr 4:4:4 need a higher port clock then YCbCr 4:2:0. Some 
> > > hardware might report that it supports the higher
> > > port clock, but because of bad shielding on the PC, the cable, or the 
> > > monitor the screen cuts out every few seconds when
> > > RGB or YCbCr 4:4:4 encoding is used, while YCbCr 4:2:0 might just work 
> > > fine without changing hardware. The drivers
> > > currently however always default to the "best available" option even if 
> > > it might be broken.
> > > 
> > > 3. Some screens natively only supporting 8-bit color, simulate 10-Bit 
> > > color by rapidly switching between 2 adjacent
> > > colors. They advertise themselves to the kernel as 10-bit monitors but 
> > > the user might not like the "fake" 10-bit effect
> > > and prefer running at the native 8-bit per color.
> > > 
> > > 4. Some screens are falsely classified as full RGB range wile they 
> > > actually use limited RGB range. This results in
> > > washed out colors in dark and bright scenes. A user override can be 
> > > helpful to manually fix this issue when it occurs.
> > > 
> > > There already exist several requests, discussion, and patches regarding 
> > > the thematic:
> > > 
> > > - https://gitlab.freedesktop.org/drm/amd/-/issues/476
> > > 
> > > - https://gitlab.freedesktop.org/drm/amd/-/issues/1548
> > > 
> > > - https://lkml.org/lkml/2021/5/7/695
> > > 
> > > - https://lkml.org/lkml/2021/5/11/416
> > > 
> 
> ...
> 
> > > Adoption:
> > > 
> > > A KDE dev wants to implement the settings in the KDE settings GUI:
> > > https://gitlab.freedesktop.org/drm/amd/-/issues/476#note_912370
> > > 
> > > Tuxedo Computers (my employer) wants to implement the settings desktop 
> > > environment agnostic in Tuxedo Control Center. I
> > > will start work on this in parallel to implementing the new kernel code.  
> > 
> > I suspect everyone would be happier to accept new uapi if we had
> > multiple compositors signed up to implement it.
> 
> I think having Weston support for these would be good, but for now it
> won't be much of an UI: just weston.ini to set, and the log to see what
> happened.
> 
> However, knowing what happened is going to be important for color
> calibration auditing:
> https://gitlab.freedesktop.org/wayland/weston/-/issues/467
> 
> Yes, please, very much for read-only properties for the feedback part.
> Properties that both userspace and kernel will write are hard to deal
> with in general.
> 
> Btw. "max bpc" I can kind of guess that conversion from framebuffer
> format to the wire bpc happens automatically and only as the final
> step,

Well, there could be dithering and whatnot also involved. So it's
not super well specified atm either.

> but "Broadcast RGB" is more complicated: is the output from the
> abstract pixel pipeline sent as-is and "Broadcast RGB" is just another
> inforframe bit to the monitor, or does "Broadcast RGB" setting actually
> change what happens in the pixel pipeline *and* set infoframe bits?

It does indeed compress the actual pixel data. There was once a patch
porposed to introduce a new enum value that only sets the infoframe and
thus would allow userspace to pass through already limited range data.
Shouldn't be hard to resurrect that if needed.

> 
> My vague recollection is that framebuffer was always assumed to be in
> full range, and then if "Broadcast RGB" was set to limited range, the
> driver would mangle the pixel

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface


Good point.

If you want to double check the alignment you can use something like 
"pahole drivers/gpu/drm/amd/amdgpu/amdgpu.ko -C drm_amdgpu_info_vbios" 
after building the kernel module.


Regards,
Christian.

Am 19.05.21 um 15:09 schrieb Deucher, Alexander:


[Public]


The structure is not 64 bit aligned.  I think you want something like:

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u32 pad;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};

*From:* Gu, JiaWei (Will) 
*Sent:* Tuesday, May 18, 2021 1:58 AM
*To:* Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org 
; mar...@gmail.com ; 
Deucher, Alexander 

*Cc:* Deng, Emily 
*Subject:* RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[Public]


Hi all,

Then the struct looks like:

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};

Sample output:

vbios name : NAVI12 A0 XT D30501 8GB EVAL 1150e/334m HYN/SAM
vbios pn : 113-D3050100-104
vbios version : 285409288
vbios ver_str : 017.003.000.008.016956
vbios date : 2021/05/03 23:32

Please help double confirm that we’re all fine with it and there’s no 
need to add & remove anything.


Best regards,

Jiawei

*From:* Nieto, David M 
*Sent:* Tuesday, May 18, 2021 12:40 PM
*To:* Gu, JiaWei (Will) ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; 
mar...@gmail.com; Deucher, Alexander 

*Cc:* Deng, Emily 
*Subject:* Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[Public]

Yes, let's remove that too,

Thanks,

David



*From:*Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
*Sent:* Monday, May 17, 2021 8:07 PM
*To:* Nieto, David M >; Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org  
>; mar...@gmail.com 
 >; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>

*Cc:* Deng, Emily mailto:emily.d...@amd.com>>
*Subject:* RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[AMD Official Use Only - Internal Distribution Only]

OK let’s remove serial.

dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);

I think we can remove dbdf as well.

Best regards,

Jiawei

*From:* Nieto, David M mailto:david.ni...@amd.com>>
*Sent:* Tuesday, May 18, 2021 10:45 AM
*To:* Gu, JiaWei (Will) >; Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org ; 
mar...@gmail.com ; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>

*Cc:* Deng, Emily mailto:emily.d...@amd.com>>
*Subject:* Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[AMD Official Use Only - Internal Distribution Only]

The serial number is ASIC information, not VBIOS information, and it 
is still available as a sysfs node... I don't think we should put it 
there.


Not sure what dbdf stands for.



*From:*Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
*Sent:* Monday, May 17, 2021 7:11 PM
*To:* Koenig, Christian >; amd-gfx@lists.freedesktop.org 
 >; Nieto, David M 
mailto:david.ni...@amd.com>>; mar...@gmail.com 
 >; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>

*Cc:* Deng, Emily mailto:emily.d...@amd.com>>
*Subject:* RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian >

Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; 
amd-gfx@lists.freedesktop.org ; 
Nieto, David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com ; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>

Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

I'm not very familiar with the technical background why we have the 
fields here once more.


But of hand we should at least remove everything which is also

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-19 Thread Deucher, Alexander

[Public]

The structure is not 64 bit aligned.  I think you want something like:

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u32 pad;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};

From: Gu, JiaWei (Will) 
Sent: Tuesday, May 18, 2021 1:58 AM
To: Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org 
; mar...@gmail.com ; Deucher, 
Alexander 
Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[Public]


Hi all,



Then the struct looks like:



> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};



Sample output:



vbios name : NAVI12 A0 XT D30501 8GB EVAL 1150e/334m HYN/SAM
vbios pn : 113-D3050100-104
vbios version : 285409288
vbios ver_str : 017.003.000.008.016956
vbios date : 2021/05/03 23:32


Please help double confirm that we’re all fine with it and there’s no need to 
add & remove anything.



Best regards,

Jiawei



From: Nieto, David M 
Sent: Tuesday, May 18, 2021 12:40 PM
To: Gu, JiaWei (Will) ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; mar...@gmail.com; 
Deucher, Alexander 
Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[Public]



Yes, let's remove that too,



Thanks,



David



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 8:07 PM
To: Nieto, David M mailto:david.ni...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; 
mar...@gmail.com 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



OK let’s remove serial.



dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);



I think we can remove dbdf as well.



Best regards,

Jiawei



From: Nieto, David M mailto:david.ni...@amd.com>>
Sent: Tuesday, May 18, 2021 10:45 AM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org; 
mar...@gmail.com; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.



Not sure what dbdf stands for.



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian 
mailto:christian.koe...@amd.com>>
Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; 
amd-gfx@lists.freedesktop.org; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

I'm not very familiar with the technical background why we have the fields here 
once more.

But of hand we should at least remove everything which is also available from 
the PCI information.

E.g. dev_id, rev_id, sub_dev_id, sub_ved_id.

Regards,
Christian.

Am 17.05.21 um 14:17 schrieb Gu, JiaWei (Will):
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi all,
>
> Thanks Christian's suggestion.
> I reverted the previous patches and squash them into this single one.
>
> As this patch shows, the current uapi change looks like this:
>
> +struct

Re: [PATCH v7 13/16] drm/scheduler: Fix hang when sched_entity released

Am 19.05.21 um 13:51 schrieb Andrey Grodzovsky:

On 2021-05-19 7:46 a.m., Christian König wrote:

Am 19.05.21 um 13:03 schrieb Andrey Grodzovsky:

On 2021-05-19 6:57 a.m., Christian König wrote:

Am 18.05.21 um 20:48 schrieb Andrey Grodzovsky:

[SNIP]

Would this be the right way to do it ?

Yes, it is at least a start. Question is if we can wait blocking
here or not.

We install a callback a bit lower to avoid blocking, so I'm
pretty sure that won't work as expected.

Christian.

I can't see why this would create problems, as long as the
dependencies
complete or force competed if they are from same device
(extracted) but

on a different ring then looks to me it should work. I will give it
a try.

Ok, but please also test the case for a killed process.

Christian.

You mean something like run glxgears and then simply
terminate it ? Because I done that. Or something more ?

Well glxgears is a bit to lightweight for that.

You need at least some test which is limited by the rendering pipeline.

Christian.

You mean something that fill the entity queue faster then sched thread
empties it so when we kill the process we actually need to explicitly go
through remaining jobs termination ? I done that too by inserting
artificial delay in drm_sched_main.

Yeah, something like that.

Ok in that case I would say that this should work then.

Christian.

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Candrey.grodzovsky%40amd.com%7Cce1252e55fae4338710d08d91ab4de01%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637570186393107071%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=vGqxY5sxpEIiQGFBNn2PWkKqVjviM29r34Yjv0wujf4%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v7 13/16] drm/scheduler: Fix hang when sched_entity released

On 2021-05-19 7:46 a.m., Christian König wrote:

Am 19.05.21 um 13:03 schrieb Andrey Grodzovsky:

On 2021-05-19 6:57 a.m., Christian König wrote:

Am 18.05.21 um 20:48 schrieb Andrey Grodzovsky:

[SNIP]

Would this be the right way to do it ?

Yes, it is at least a start. Question is if we can wait blocking
here or not.

We install a callback a bit lower to avoid blocking, so I'm pretty
sure that won't work as expected.

Christian.

I can't see why this would create problems, as long as the dependencies
complete or force competed if they are from same device (extracted) but
on a different ring then looks to me it should work. I will give it
a try.

Ok, but please also test the case for a killed process.

Christian.

You mean something like run glxgears and then simply
terminate it ? Because I done that. Or something more ?

Well glxgears is a bit to lightweight for that.

You need at least some test which is limited by the rendering pipeline.

Christian.

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov


Yeah, but you can't do that it will probably trigger the watchdog timer.

The usec_timeout is named this way because it is a usec timeout. 
Anything large than 1ms is a no-go here.


When the other instances do a FLR we don't really need to wait for the 
TLB flush anyway since any FLR will kill that.


Christian.

Am 19.05.21 um 13:08 schrieb Liu, Cheng Zhe:

[AMD Official Use Only]

We support 12 VF at most. In worst case, the first 11 all IDLE fail and do FLR, 
it will need 11 * 500ms to switch to the 12nd VF,
so I set 12 * 500ms  for the timeout.

-Original Message-
From: Christian König 
Sent: Wednesday, May 19, 2021 6:08 PM
To: Liu, Cheng Zhe ; amd-gfx@lists.freedesktop.org
Cc: Xiao, Jack ; Xu, Feifei ; Wang, Kevin(Yang) 
; Tuikov, Luben ; Deucher, Alexander 
; Koenig, Christian ; Zhang, Hawking 

Subject: Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

Am 19.05.21 um 11:32 schrieb Chengzhe Liu:

When there is 12 VF, we need to increase the timeout

NAK, 6 seconds is way to long to wait polling on a fence.

Why should an invalidation take that long? The engine are per VF just to avoid 
exactly that problem.

Christian.


Signed-off-by: Chengzhe Liu 
---
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
   2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f02dc904e4cf..a5f005c5d0ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for
+SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
   
@@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct

amdgpu_device *adev,
   
   		amdgpu_ring_commit(ring);

spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ceb3968d8326..e4a18d8f75c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for
+SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
   
@@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct

amdgpu_device *adev,
   
   		amdgpu_ring_commit(ring);

spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
up_read(>reset_sem);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v7 13/16] drm/scheduler: Fix hang when sched_entity released

Am 19.05.21 um 13:03 schrieb Andrey Grodzovsky:

On 2021-05-19 6:57 a.m., Christian König wrote:

Am 18.05.21 um 20:48 schrieb Andrey Grodzovsky:

[SNIP]

Would this be the right way to do it ?

Yes, it is at least a start. Question is if we can wait blocking
here or not.

We install a callback a bit lower to avoid blocking, so I'm pretty
sure that won't work as expected.

Christian.

Ok, but please also test the case for a killed process.

Christian.

You mean something like run glxgears and then simply
terminate it ? Because I done that. Or something more ?

Well glxgears is a bit to lightweight for that.

You need at least some test which is limited by the rendering pipeline.

Christian.

Andrey

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/amdgpu: fix a potential deadlock in gpu reset




On 2021-05-17 6:55 a.m., Christian König wrote:

Am 17.05.21 um 12:52 schrieb Lang Yu:

When amdgpu_ib_ring_tests failed, the reset logic called
amdgpu_device_ip_suspend twice, then deadlock occurred.

Deadlock log:
[  805.655192] amdgpu :04:00.0: amdgpu: ib ring test failed (-110).
[  806.011571] [drm] Register(0) [mmUVD_POWER_STATUS] failed to reach 
value 0x0001 != 0x0002
[  806.280139] [drm] Register(0) [mmUVD_POWER_STATUS] failed to reach 
value 0x0001 != 0x0002

[  806.290952] [drm] free PSP TMR buffer

[  806.319406] 
[  806.320315] WARNING: possible recursive locking detected
[  806.321225] 5.11.0-custom #1 Tainted: G    W  OEL
[  806.322135] 
[  806.323043] cat/2593 is trying to acquire lock:
[  806.323825] 888136b1cdc8 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb8/0x1d0 [amdgpu]

[  806.325668]
    but task is already holding lock:
[  806.326664] 888136b1cdc8 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb8/0x1d0 [amdgpu]

[  806.328430]
    other info that might help us debug this:
[  806.329539]  Possible unsafe locking scenario:

[  806.330549]    CPU0
[  806.330983]    
[  806.331416]   lock(>dm.dc_lock);
[  806.332086]   lock(>dm.dc_lock);
[  806.332738]
 *** DEADLOCK ***

[  806.333747]  May be due to missing lock nesting notation

[  806.334899] 3 locks held by cat/2593:
[  806.335537]  #0: 888100d3f1b8 (>mutex){+.+.}-{3:3}, at: 
simple_attr_read+0x4e/0x110
[  806.337009]  #1: 888136b1fd78 (>reset_sem){}-{3:3}, 
at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  806.339018]  #2: 888136b1cdc8 (>dm.dc_lock){+.+.}-{3:3}, 
at: dm_suspend+0xb8/0x1d0 [amdgpu]

[  806.340869]
    stack backtrace:
[  806.341621] CPU: 6 PID: 2593 Comm: cat Tainted: G    W  OEL
5.11.0-custom #1
[  806.342921] Hardware name: AMD Celadon-CZN/Celadon-CZN, BIOS 
WLD0C23N_Weekly_20_12_2 12/23/2020

[  806.344413] Call Trace:
[  806.344849]  dump_stack+0x93/0xbd
[  806.345435]  __lock_acquire.cold+0x18a/0x2cf
[  806.346179]  lock_acquire+0xca/0x390
[  806.346807]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.347813]  __mutex_lock+0x9b/0x930
[  806.348454]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.349434]  ? amdgpu_device_indirect_rreg+0x58/0x70 [amdgpu]
[  806.350581]  ? _raw_spin_unlock_irqrestore+0x47/0x50
[  806.351437]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.352437]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.353252]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.354064]  mutex_lock_nested+0x1b/0x20
[  806.354747]  ? mutex_lock_nested+0x1b/0x20
[  806.355457]  dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.356427]  ? soc15_common_set_clockgating_state+0x17d/0x19 [amdgpu]
[  806.357736]  amdgpu_device_ip_suspend_phase1+0x78/0xd0 [amdgpu]
[  806.360394]  amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  806.362926]  amdgpu_device_pre_asic_reset+0xb3/0x270 [amdgpu]
[  806.365560]  amdgpu_device_gpu_recover.cold+0x679/0x8eb [amdgpu]
[  806.368331]  ? __pm_runtime_resume+0x60/0x80
[  806.370509]  gpu_recover_get+0x2e/0x60 [amdgpu]
[  806.372887]  simple_attr_read+0x6d/0x110
[  806.374966]  debugfs_attr_read+0x49/0x70
[  806.377046]  full_proxy_read+0x5f/0x90
[  806.379054]  vfs_read+0xa3/0x190
[  806.380969]  ksys_read+0x70/0xf0
[  806.382833]  __x64_sys_read+0x1a/0x20
[  806.384803]  do_syscall_64+0x38/0x90
[  806.386743]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  806.388946] RIP: 0033:0x7fb084ea1142
[  806.390914] Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 
19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 
10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 
54 24
[  806.395496] RSP: 002b:7fffde50ee08 EFLAGS: 0246 ORIG_RAX: 

[  806.398298] RAX: ffda RBX: 0002 RCX: 
7fb084ea1142
[  806.401063] RDX: 0002 RSI: 7fb0844ff000 RDI: 
0003
[  806.403793] RBP: 7fb0844ff000 R08: 7fb0844fe010 R09: 

[  806.406516] R10: 0022 R11: 0246 R12: 
555d3d3b51f0
[  806.409246] R13: 0003 R14: 0002 R15: 
0002


I think we should shorten the backtrace here a bit.



Signed-off-by: Lang Yu 


Looks sane to me, but Andrey should probably also take a look.

Acked-by: Christian König 


Yes, seems like a typo...

Reviewed-by: Andrey Grodzovsky andrey.grodzov...@amd.com

Andrey





---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7c6c435e5d02..ff341154394e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4476,7 +4476,6 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,

  r = amdgpu_ib_ring_tests(tmp_adev);

Re: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to avoid compute hang

2021-05-19 Thread Nirmoy



On 5/14/21 4:13 PM, Alex Deucher wrote:

On Fri, May 14, 2021 at 4:20 AM  wrote:

From: changzhu 

From: Changfeng 

There is problem with 3DCGCG firmware and it will cause compute test
hang on picasso/raven1. It needs to disable 3DCGCG in driver to avoid
compute hang.

Change-Id: Ic7d3c7922b2b32f7ac5193d6a4869cbc5b3baa87
Signed-off-by: Changfeng 

Reviewed-by: Alex Deucher 

WIth this applied, can we re-enable the additional compute queues?



I  didn't push that change as I was suppose do more tests with KFD and

I probably got distracted by some other activity. Sorry for causing this 
confusion!



Acked-by: Nirmoy Das 


Regards,

Nirmoy




Alex


---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++---
  drivers/gpu/drm/amd/amdgpu/soc15.c|  2 --
  2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 22608c45f07c..feaa5e4a5538 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4947,7 +4947,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct 
amdgpu_device *adev,
 amdgpu_gfx_rlc_enter_safe_mode(adev);

 /* Enable 3D CGCG/CGLS */
-   if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+   if (enable) {
 /* write cmd to clear cgcg/cgls ov */
 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 /* unset CGCG override */
@@ -4959,8 +4959,12 @@ static void gfx_v9_0_update_3d_clock_gating(struct 
amdgpu_device *adev,
 /* enable 3Dcgcg FSM(0x363f) */
 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);

-   data = (0x36 << 
RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-   RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+   if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+   data = (0x36 << 
RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+   RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+   else
+   data = 0x0 << 
RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
+
 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 data |= (0x000F << 
RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 4b660b2d1c22..080e715799d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1393,7 +1393,6 @@ static int soc15_common_early_init(void *handle)
 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 AMD_CG_SUPPORT_GFX_MGLS |
 AMD_CG_SUPPORT_GFX_CP_LS |
-   AMD_CG_SUPPORT_GFX_3D_CGCG |
 AMD_CG_SUPPORT_GFX_3D_CGLS |
 AMD_CG_SUPPORT_GFX_CGCG |
 AMD_CG_SUPPORT_GFX_CGLS |
@@ -1413,7 +1412,6 @@ static int soc15_common_early_init(void *handle)
 AMD_CG_SUPPORT_GFX_MGLS |
 AMD_CG_SUPPORT_GFX_RLC_LS |
 AMD_CG_SUPPORT_GFX_CP_LS |
-   AMD_CG_SUPPORT_GFX_3D_CGCG |
 AMD_CG_SUPPORT_GFX_3D_CGLS |
 AMD_CG_SUPPORT_GFX_CGCG |
 AMD_CG_SUPPORT_GFX_CGLS |
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cnirmoy.das%40amd.com%7C2337da3349cc4613a5bf08d916e28b82%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637565984543494149%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=BCI8ckEunFfb5P80Ncaa3iuz9SHEqj07SXt6H2lZMCg%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cnirmoy.das%40amd.com%7C2337da3349cc4613a5bf08d916e28b82%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637565984543494149%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=BCI8ckEunFfb5P80Ncaa3iuz9SHEqj07SXt6H2lZMCg%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to avoid compute hang

2021-05-19 Thread Nirmoy



On 5/19/21 5:14 AM, Huang, Ray wrote:


[Public]

I check the patch (below) to disable compute queues for raven is not 
landed into drm-next. So actually all queues are enabled at this 
moment. Nirmoy, can we get your confirmation?




I indeed didn't push the commit that disable all but one cu for raven. I 
was suppose to check with kfd as Felix wanted to


know if that bug affects KFD. I think I got distracted with something else.


Regards,

Nirmoy

*diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c*


*index 97a8f786cf85..9352fcb77fe9 100644*

*--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c*

*+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c*

*@@ -812,6 +812,13 @@* void amdgpu_kiq_wreg(struct amdgpu_device 
*adev, uint32_t reg, uint32_t v)


int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)

{

if (amdgpu_num_kcq == -1) {

+ /* raven firmware currently can not load balance jobs

+ * among multiple compute queues. Enable only one

+ * compute queue till we have a firmware fix.

+ */

+ if (adev->asic_type == CHIP_RAVEN)

+ return 1;

+

return 8;

} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {

dev_warn(adev->dev, "set kernel compute queue number to 8 due to 
invalid parameter provided by user\n");


And I am glad to see that we have a solution to fix this issue at 
current. Nice work, Changfeng!


Best Regards,

Ray

*From:* Deucher, Alexander 
*Sent:* Wednesday, May 19, 2021 11:04 AM
*To:* Chen, Guchun ; Zhu, Changfeng 
; Alex Deucher ; Das, 
Nirmoy 
*Cc:* Huang, Ray ; amd-gfx list 

*Subject:* Re: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang


[Public]

I thought we had disabled all but one of the compute queues on raven 
due to this issue or at least disabled the schedulers for the 
additional queues, but maybe I'm misremembering.


Alex



*From:*Chen, Guchun mailto:guchun.c...@amd.com>>
*Sent:* Tuesday, May 18, 2021 11:00 PM
*To:* Zhu, Changfeng >; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>; Alex 
Deucher mailto:alexdeuc...@gmail.com>>; Das, 
Nirmoy mailto:nirmoy@amd.com>>
*Cc:* Huang, Ray mailto:ray.hu...@amd.com>>; 
amd-gfx list >
*Subject:* RE: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang


[Public]

Nirmoy’s patch landed already if I understand correctly.

d41a39dda140 drm/scheduler: improve job distribution with multiple queues

Regards,

Guchun

*From:* amd-gfx > *On Behalf Of *Zhu, 
Changfeng

*Sent:* Wednesday, May 19, 2021 10:56 AM
*To:* Deucher, Alexander >; Alex Deucher 
mailto:alexdeuc...@gmail.com>>; Das, Nirmoy 
mailto:nirmoy@amd.com>>
*Cc:* Huang, Ray mailto:ray.hu...@amd.com>>; 
amd-gfx list >
*Subject:* RE: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang


[Public]

[Public]

Hi Alex,

This is the issue exposed by Nirmoy's patch that provided better load 
balancing across queues.


BR,

Changfeng.

*From:* Deucher, Alexander >

*Sent:* Wednesday, May 19, 2021 10:53 AM
*To:* Zhu, Changfeng >; Alex Deucher >; Das, Nirmoy >
*Cc:* Huang, Ray mailto:ray.hu...@amd.com>>; 
amd-gfx list >
*Subject:* Re: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang


[Public]

+ Nirmoy

I thought we disabled all but one of the compute queues on raven due 
to this issue. Maybe that patch never landed?  Wasn't this the same 
issue that was exposed by Nirmoy's patch that provided better load 
balancing across queues?


Alex



*From:*amd-gfx > on behalf of Zhu, 
Changfeng mailto:changfeng@amd.com>>

*Sent:* Tuesday, May 18, 2021 10:28 PM
*To:* Alex Deucher mailto:alexdeuc...@gmail.com>>
*Cc:* Huang, Ray mailto:ray.hu...@amd.com>>; 
amd-gfx list >
*Subject:* RE: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang


[AMD Official Use Only - Internal Distribution Only]

Hi Alex.

I have submitted the patch: drm/amdgpu: disable 3DCGCG on 
picasso/raven1 to avoid compute hang


Do you mean we have something else to do for re-enabling the extra 
compute queues?


BR,
Changfeng.

-Original Message-
From: Alex Deucher mailto:alexdeuc...@gmail.com>>
Sent: Wednesday, May 19, 2021 10:20 AM
To: Zhu, Changfeng mailto:changfeng@amd.com>>
Cc: Huang, Ray mailto:ray.hu...@amd.com>>; amd-gfx 
list >
Subject: Re: [PATCH] drm/amdgpu: disable 3DCGCG on picasso/raven1 to 
avoid compute hang

RE: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

2021-05-19 Thread Liu, Cheng Zhe

[AMD Official Use Only]

We support 12 VF at most. In worst case, the first 11 all IDLE fail and do FLR, 
it will need 11 * 500ms to switch to the 12nd VF,
so I set 12 * 500ms  for the timeout.

-Original Message-
From: Christian König  
Sent: Wednesday, May 19, 2021 6:08 PM
To: Liu, Cheng Zhe ; amd-gfx@lists.freedesktop.org
Cc: Xiao, Jack ; Xu, Feifei ; Wang, 
Kevin(Yang) ; Tuikov, Luben ; 
Deucher, Alexander ; Koenig, Christian 
; Zhang, Hawking 
Subject: Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

Am 19.05.21 um 11:32 schrieb Chengzhe Liu:
> When there is 12 VF, we need to increase the timeout

NAK, 6 seconds is way to long to wait polling on a fence.

Why should an invalidation take that long? The engine are per VF just to avoid 
exactly that problem.

Christian.

>
> Signed-off-by: Chengzhe Liu 
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
>   2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f02dc904e4cf..a5f005c5d0ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
>   uint32_t seq;
>   uint16_t queried_pasid;
>   bool ret;
> + uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
> +SRIOV */
>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>   struct amdgpu_kiq *kiq = >gfx.kiq;
>   
> @@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
>   
>   amdgpu_ring_commit(ring);
>   spin_unlock(>gfx.kiq.ring_lock);
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (amdgpu_sriov_vf(adev))
> + r = amdgpu_fence_wait_polling(ring, seq, 
> sriov_usec_timeout);
> + else
> + r = amdgpu_fence_wait_polling(ring, seq, 
> adev->usec_timeout);
>   if (r < 1) {
>   dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
> r);
>   return -ETIME;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index ceb3968d8326..e4a18d8f75c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
>   uint32_t seq;
>   uint16_t queried_pasid;
>   bool ret;
> + uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
> +SRIOV */
>   struct amdgpu_ring *ring = >gfx.kiq.ring;
>   struct amdgpu_kiq *kiq = >gfx.kiq;
>   
> @@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
>   
>   amdgpu_ring_commit(ring);
>   spin_unlock(>gfx.kiq.ring_lock);
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (amdgpu_sriov_vf(adev))
> + r = amdgpu_fence_wait_polling(ring, seq, 
> sriov_usec_timeout);
> + else
> + r = amdgpu_fence_wait_polling(ring, seq, 
> adev->usec_timeout);
>   if (r < 1) {
>   dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
> r);
>   up_read(>reset_sem);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v7 13/16] drm/scheduler: Fix hang when sched_entity released




On 2021-05-19 6:57 a.m., Christian König wrote:

Am 18.05.21 um 20:48 schrieb Andrey Grodzovsky:

[SNIP]


Would this be the right way to do it ?


Yes, it is at least a start. Question is if we can wait blocking here 
or not.


We install a callback a bit lower to avoid blocking, so I'm pretty 
sure that won't work as expected.


Christian.


I can't see why this would create problems, as long as the dependencies
complete or force competed if they are from same device (extracted) but
on a different ring then looks to me it should work. I will give it
a try.


Ok, but please also test the case for a killed process.

Christian.


You mean something like run glxgears and then simply
terminate it ? Because I done that. Or something more ?

Andrey






Andrey


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Candrey.grodzovsky%40amd.com%7Cce1252e55fae4338710d08d91ab4de01%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637570186393107071%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=vGqxY5sxpEIiQGFBNn2PWkKqVjviM29r34Yjv0wujf4%3Dreserved=0 


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID


Well I don't think generating an UUID in the kernel makes sense in general.

What we can do is to expose the serial number of the device, so that 
userspace can create an UUID if necessary.


Christian.

Am 18.05.21 um 22:37 schrieb Nieto, David M:


[AMD Official Use Only]


I think the sysfs node should be moved into amdgpu_pm instead of the 
amdgpu_device.c and generation of the unique_id should be moved to 
navi10_ppt.c, similarly to other chips.


Thinking it better, generating a random UUID makes no sense in the 
driver level, any application can do the same thing on userspace if 
the UUID sysfs node is empty.


So, I think we should do the same as with the unique_id node, if the 
unique_id is not present, just return.


David

*From:* Alex Deucher 
*Sent:* Tuesday, May 18, 2021 7:12 AM
*To:* Gu, JiaWei (Will) 
*Cc:* amd-gfx list ; Deng, Emily 
; Nieto, David M 

*Subject:* Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID
On Mon, May 17, 2021 at 1:54 AM Jiawei Gu  wrote:
>
> Introduce an RFC 4122 compliant UUID for the GPUs derived
> from the unique GPU serial number (from Vega10) on gpus.
> Where this serial number is not available, use a compliant
> random UUID.
>
> For virtualization, the unique ID is passed by the host driver
> in the PF2VF structure.
>
> Signed-off-by: Jiawei Gu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 36 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 96 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    |  4 +
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c |  5 ++
>  drivers/gpu/drm/amd/amdgpu/nv.h |  3 +
>  6 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> index 3147c1c935c8..ad6d4b55be6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -802,6 +802,40 @@ struct amd_powerplay {
>   (rid == 0x01) || \
>   (rid == 0x10
>
> +union amdgpu_uuid_info {
> +   struct {
> +   union {
> +   struct {
> +   uint32_t did    : 16;
> +   uint32_t fcn    : 8;
> +   uint32_t asic_7 : 8;
> +   };
> +   uint32_t time_low;
> +   };
> +
> +   struct {
> +   uint32_t time_mid  : 16;
> +   uint32_t time_high : 12;
> +   uint32_t version   : 4;
> +   };
> +
> +   struct {
> +   struct {
> +   uint8_t clk_seq_hi : 6;
> +   uint8_t variant : 2;
> +   };
> +   union {
> +   uint8_t clk_seq_low;
> +   uint8_t asic_6;
> +   };
> +   uint16_t asic_4;
> +   };
> +
> +   uint32_t asic_0;
> +   };
> +   char as_char[16];
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  struct amdgpu_device {
> @@ -1074,6 +1108,8 @@ struct amdgpu_device {
> char product_name[32];
> char    serial[20];
>
> +   union amdgpu_uuid_info uuid_info;
> +
> struct amdgpu_autodump  autodump;
>
> atomic_t throttling_logging_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

> index 7c6c435e5d02..079841e1cb52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_i2c.h"
> @@ -3239,11 +3240,104 @@ static int 
amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)

> return ret;
>  }
>
> +static bool amdgpu_is_uuid_info_empty(union amdgpu_uuid_info 
*uuid_info)

> +{
> +   return (uuid_info->time_low    == 0 &&
> +   uuid_info->time_mid == 0 &&
> +   uuid_info->time_high == 0 &&
> +   uuid_info->version == 0 &&
> +   uuid_info->clk_seq_hi == 0 &&
> +   uuid_info->variant == 0 &&
> +   uuid_info->clk_seq_low == 0 &&
> +   uuid_info->asic_4 == 0 &&
> +   uuid_info->asic_0 == 0);
> +}
> +
> +static void amdgpu_gen_uuid_info(union amdgpu_uuid_info *uuid_info,
> +   uint64_t serial, uint16_t did, 
uint8_t idx)

> +{
> +   uint16_t clk_seq = 0;
> +
> +   /*

Re: [PATCH v7 13/16] drm/scheduler: Fix hang when sched_entity released


Am 18.05.21 um 20:48 schrieb Andrey Grodzovsky:

[SNIP]


Would this be the right way to do it ?


Yes, it is at least a start. Question is if we can wait blocking here 
or not.


We install a callback a bit lower to avoid blocking, so I'm pretty 
sure that won't work as expected.


Christian.


I can't see why this would create problems, as long as the dependencies
complete or force competed if they are from same device (extracted) but
on a different ring then looks to me it should work. I will give it
a try.


Ok, but please also test the case for a killed process.

Christian.



Andrey


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: modify system reference clock source for navi+ (V2)

2021-05-19 Thread Huang, Ray

[AMD Official Use Only]

Reviewed-by: Huang Rui 

-Original Message-
From: Liu, Aaron  
Sent: Tuesday, May 18, 2021 10:16 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Huang, Ray 
; Wang, Kevin(Yang) ; Liu, Aaron 

Subject: [PATCH] drm/amdgpu: modify system reference clock source for navi+ (V2)

Starting from Navi+, the rlc reference clock is used for system clock from 
vbios gfx_info table. It is incorrect to use core_refclk_10khz of vbios 
smu_info table as system clock.

Signed-off-by: Aaron Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 8c417014ca89..3b5d13189073 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -546,6 +546,21 @@ int amdgpu_atomfirmware_get_clock_info(struct 
amdgpu_device *adev)
ret = 0;
}
 
+   /* if asic is Navi+, the rlc reference clock is used for system clock
+* from vbios gfx_info table */
+   if (adev->asic_type >= CHIP_NAVI10) {
+   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+  gfx_info);
+   if (amdgpu_atom_parse_data_header(mode_info->atom_context, 
index, NULL,
+ , , _offset)) {
+   struct atom_gfx_info_v2_2 *gfx_info = (struct 
atom_gfx_info_v2_2*)
+   (mode_info->atom_context->bios + data_offset);
+   if ((frev == 2) && (crev >= 2))
+   spll->reference_freq = 
le32_to_cpu(gfx_info->rlc_gpu_timer_refclk);
+   ret = 0;
+   }
+   }
+
return ret;
 }
 
--
2.25.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Increase tlb flush timeout for sriov


Am 19.05.21 um 11:32 schrieb Chengzhe Liu:

When there is 12 VF, we need to increase the timeout


NAK, 6 seconds is way to long to wait polling on a fence.

Why should an invalidation take that long? The engine are per VF just to 
avoid exactly that problem.


Christian.



Signed-off-by: Chengzhe Liu 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
  2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f02dc904e4cf..a5f005c5d0ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
  
@@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
  
  		amdgpu_ring_commit(ring);

spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ceb3968d8326..e4a18d8f75c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
  
@@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
  
  		amdgpu_ring_commit(ring);

spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
up_read(>reset_sem);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs


Am 19.05.21 um 07:45 schrieb Felix Kuehling:

SG BOs such as dmabuf imports and userptr BOs do not consume system
resources directly. Instead they point to resources owned elsewhere.
They typically get evicted by DMABuf move notifiers of MMU notifiers.
If those notifiers don't need to wait for hardware fences (i.e. the SG
BOs are used in a preemptible context), then we don't need to limit
them to the GTT size and we don't need TTM to evict them.

Create a new domain for such preemptible SG BOs that does not impose
artificial size limits and TTM evictions.


Please don't create an GEM domain for this. This has just to much 
potential to be abused by userspace.


The kernel is the only place where we can decide if the BO is 
preemptible or not.


Christian.



Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |   4 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|   8 +
  .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  37 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  11 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   3 +-
  include/uapi/drm/amdgpu_drm.h |   7 +-
  8 files changed, 258 insertions(+), 9 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6331a11299d0..6cf0fe871d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-   amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-   amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
-   amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o 
amdgpu_vm_cpu.o \
+   amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
+   amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
+   amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
+   amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 311bcdc59eda..280cc0c0a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
return -EINVAL;
  
+	/* preemptible domain not supported by current CS API */

+   if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
+   return -EINVAL;
+
if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is 
disabled\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 745fcf3ea450..5b538e746afa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
c++;
}
  
+	if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {

+   places[c].fpfn = 0;
+   places[c].lpfn = 0;
+   places[c].mem_type = AMDGPU_PL_PREEMPT;
+   places[c].flags = 0;
+   c++;
+   }
+
if (!c) {
places[c].fpfn = 0;
places[c].lpfn = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
new file mode 100644
index ..b4185dc3c394
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2016-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE

Re: [RFC PATCH 0/3] A drm_plane API to support HDR planes

On Wed, 19 May 2021 11:53:37 +0300
Pekka Paalanen  wrote:

...

> TL;DR:
> 
> I would summarise my comments so far into these:
> 
> - Telling the kernel the color spaces and letting it come up with
>   whatever color transformation formula from those is not enough,
>   because it puts the render intent policy decision in the kernel.
> 
> - Telling the kernel what color transformations need to be done is
>   good, if it is clearly defined.
> 
> - Using an enum-based UAPI to tell the kernel what color
>   transformations needs to be done (e.g. which EOTF or EOTF^-1 to apply
>   at a step in the abstract pipeline) is very likely ok for many
>   Wayland compositors in most cases, but may not be sufficient for all
>   use cases. Of course, one is always bound by what hardware can do, so
>   not a big deal.
> 
> - You may need to define mutually exclusive KMS properties (referring
>   to my email in another branch of this email tree).
> 
> - I'm not sure I (we?) can meaningfully review things like "SDR boost"
>   property until we know ourselves how to composite different types of
>   content together. Maybe someone else could.
> 
> Does this help or raise thoughts?
> 
> The work on Weston CM right now is aiming to get it up to a point
> where we can start nicely testing different compositing approaches and
> methods and parameters, and I expect that will also feed back into the
> Wayland CM protocol design as well.

I have forgot to mention one important thing:

Generic Wayland compositors will be using KMS planes opportunistically.
The compositor will be switching between GL and KMS compositing
on-demand, refresh by refresh. This means that both GL and KMS
compositing must produce identical results, or users will be seeing
"color flicks" on switch.

This is a practical reason why we really want to know in full detail
how the KMS pipeline processes pixels.


Thanks,
pq


pgpNdpbjuU5k0.pgp
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and swapin


I'm scratching my head how that is even possible.

See when a BO is created in the system domain it is just an empty hull, 
e.g. without backing store and allocated pages.


So the swapout function will just ignore it.

Christian.

Am 19.05.21 um 07:07 schrieb Pan, Xinhui:

[AMD Official Use Only]

I have reverted Chris'  patch, still hit this failure.
Just see two lines in Chris' patch. Any BO in cpu domian would be swapout 
first. That is why we hit this issue frequently now. But the bug is there long 
time ago.

-   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
-   list_for_each_entry(bo, >swap_lru[i], swap) {
[snip]
+   for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+   for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {



发件人: Pan, Xinhui 
发送时间: 2021年5月19日 12:09
收件人: Kuehling, Felix; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; Koenig, Christian; dri-de...@lists.freedesktop.org; 
dan...@ffwll.ch
主题: 回复: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and 
swapin

yes, we really dont swapout SG BOs.
The problems is that before we validate a userptr BO, we create this BO in CPU 
domain by default. So this BO has chance to swapout.

we set flag TTM_PAGE_FLAG_SG on userptr BO in popluate() which is too late.
I have not try to revert Chris' patch as I think it desnt help. Or I can have a 
try later.


发件人: Kuehling, Felix 
发送时间: 2021年5月19日 11:29
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; Koenig, Christian; dri-de...@lists.freedesktop.org; 
dan...@ffwll.ch
主题: Re: [RFC PATCH 1/2] drm/amdgpu: Fix memory corruption due to swapout and 
swapin

Swapping SG BOs makes no sense, because TTM doesn't own the pages of
this type of BO.

Last I checked, userptr BOs (and other SG BOs) were protected from
swapout by the fact that they would not be added to the swap-LRU. But it
looks like Christian just removed the swap-LRU. I guess this broke that
protection:

commit 2cb51d22d70b18eaf339abf9758bf0b7608da65c
Author: Christian König 
Date:   Tue Oct 6 16:30:09 2020 +0200

  drm/ttm: remove swap LRU v3

  Instead evict round robin from each devices SYSTEM and TT domain.

  v2: reorder num_pages access reported by Dan's script
  v3: fix rebase fallout, num_pages should be 32bit

  Signed-off-by: Christian König 
  Tested-by: Nirmoy Das 
  Reviewed-by: Huang Rui 
  Reviewed-by: Matthew Auld 
  Link: https://patchwork.freedesktop.org/patch/424009/

Regards,
Felix


On 2021-05-18 10:28 p.m., xinhui pan wrote:

cpu 1   cpu 2
kfd alloc BO A(userptr) alloc BO B(GTT)
  ->init -> validate   -> init -> validate -> 
populate
  init_user_pages-> swapout BO A //hit ttm 
pages limit
   -> get_user_pages (fill up ttm->pages)
-> validate -> populate
-> swapin BO A // Now hit the BUG

We know that get_user_pages may race with swapout on same BO.
Threre are some issues I have met.
1) memory corruption.
This is because we do a swap before memory is setup. ttm_tt_swapout()
just create a swap_storage with its content being 0x0. So when we setup
memory after the swapout. The following swapin makes the memory
corrupted.

2) panic
When swapout happes with get_user_pages, they touch ttm->pages without
anylock. It causes memory corruption too. But I hit page fault mostly.

Signed-off-by: xinhui pan 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +++-
   1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 928e8d57cd08..42460e4480f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -835,6 +835,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr)
   struct amdkfd_process_info *process_info = mem->process_info;
   struct amdgpu_bo *bo = mem->bo;
   struct ttm_operation_ctx ctx = { true, false };
+ struct page **pages;
   int ret = 0;

   mutex_lock(_info->lock);
@@ -852,7 +853,13 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr)
   goto out;
   }

- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!pages)
+ goto unregister_out;
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, pages);
   if (ret) {
   pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
   goto unregister_out;
@@ -863,6 +870,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr)
   pr_err("%s: Failed

Re: New uAPI for color management proposal and feedback request

On Wed, 12 May 2021 16:04:16 +0300
Ville Syrjälä  wrote:

> On Wed, May 12, 2021 at 02:06:56PM +0200, Werner Sembach wrote:
> > Hello,
> > 
> > In addition to the existing "max bpc", and "Broadcast RGB/output_csc" drm 
> > properties I propose 4 new properties:
> > "preferred pixel encoding", "active color depth", "active color range", and 
> > "active pixel encoding"
> > 
> > 
> > Motivation:
> > 
> > Current monitors have a variety pixel encodings available: RGB, YCbCr 
> > 4:4:4, YCbCr 4:2:2, YCbCr 4:2:0.
> > 
> > In addition they might be full or limited RGB range and the monitors accept 
> > different bit depths.
> > 
> > Currently the kernel driver for AMD and Intel GPUs automatically configure 
> > the color settings automatically with little
> > to no influence of the user. However there are several real world scenarios 
> > where the user might disagree with the
> > default chosen by the drivers and wants to set his or her own preference.
> > 
> > Some examples:
> > 
> > 1. While RGB and YCbCr 4:4:4 in theory carry the same amount of color 
> > information, some screens might look better on one
> > than the other because of bad internal conversion. The driver currently 
> > however has a fixed default that is chosen if
> > available (RGB for Intel and YCbCr 4:4:4 for AMD). The only way to change 
> > this currently is by editing and overloading
> > the edid reported by the monitor to the kernel.
> > 
> > 2. RGB and YCbCr 4:4:4 need a higher port clock then YCbCr 4:2:0. Some 
> > hardware might report that it supports the higher
> > port clock, but because of bad shielding on the PC, the cable, or the 
> > monitor the screen cuts out every few seconds when
> > RGB or YCbCr 4:4:4 encoding is used, while YCbCr 4:2:0 might just work fine 
> > without changing hardware. The drivers
> > currently however always default to the "best available" option even if it 
> > might be broken.
> > 
> > 3. Some screens natively only supporting 8-bit color, simulate 10-Bit color 
> > by rapidly switching between 2 adjacent
> > colors. They advertise themselves to the kernel as 10-bit monitors but the 
> > user might not like the "fake" 10-bit effect
> > and prefer running at the native 8-bit per color.
> > 
> > 4. Some screens are falsely classified as full RGB range wile they actually 
> > use limited RGB range. This results in
> > washed out colors in dark and bright scenes. A user override can be helpful 
> > to manually fix this issue when it occurs.
> > 
> > There already exist several requests, discussion, and patches regarding the 
> > thematic:
> > 
> > - https://gitlab.freedesktop.org/drm/amd/-/issues/476
> > 
> > - https://gitlab.freedesktop.org/drm/amd/-/issues/1548
> > 
> > - https://lkml.org/lkml/2021/5/7/695
> > 
> > - https://lkml.org/lkml/2021/5/11/416
> > 

...

> > Adoption:
> > 
> > A KDE dev wants to implement the settings in the KDE settings GUI:
> > https://gitlab.freedesktop.org/drm/amd/-/issues/476#note_912370
> > 
> > Tuxedo Computers (my employer) wants to implement the settings desktop 
> > environment agnostic in Tuxedo Control Center. I
> > will start work on this in parallel to implementing the new kernel code.  
> 
> I suspect everyone would be happier to accept new uapi if we had
> multiple compositors signed up to implement it.

I think having Weston support for these would be good, but for now it
won't be much of an UI: just weston.ini to set, and the log to see what
happened.

However, knowing what happened is going to be important for color
calibration auditing:
https://gitlab.freedesktop.org/wayland/weston/-/issues/467

Yes, please, very much for read-only properties for the feedback part.
Properties that both userspace and kernel will write are hard to deal
with in general.

Btw. "max bpc" I can kind of guess that conversion from framebuffer
format to the wire bpc happens automatically and only as the final
step, but "Broadcast RGB" is more complicated: is the output from the
abstract pixel pipeline sent as-is and "Broadcast RGB" is just another
inforframe bit to the monitor, or does "Broadcast RGB" setting actually
change what happens in the pixel pipeline *and* set infoframe bits?

My vague recollection is that framebuffer was always assumed to be in
full range, and then if "Broadcast RGB" was set to limited range, the
driver would mangle the pixel pipeline to convert from full to limited
range. This means that it would be impossible to have limited range
data in a framebuffer, or there might be a double-conversion by
userspace programming a LUT for limited->full and then the driver
adding full->limited. I'm also confused how full/limited works when
framebuffer is in RGB/YCbCr and the monitor wire format is in RGB/YCbCr
and there may be RGB->YCbCR or YCbCR->RGB conversions going on - or
maybe even FB YCbCR -> RGB -> DEGAMMA -> CTM -> GAMMA -> YCbCR.

I wish someone drew a picture of the KMS abstract pixel pipeline with
all the existing KMS properties in it. :-)

Thanks,
pq

[PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

2021-05-19 Thread Chengzhe Liu

When there is 12 VF, we need to increase the timeout

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f02dc904e4cf..a5f005c5d0ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
 
@@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
 
amdgpu_ring_commit(ring);
spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ceb3968d8326..e4a18d8f75c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
 
@@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
 
amdgpu_ring_commit(ring);
spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
up_read(>reset_sem);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov

2021-05-19 Thread Chengzhe Liu

In sriov, cwsr is not stable

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 80015e866498..89bd0059329b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info = 
{
.event_interrupt_class = _interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
-   .supports_cwsr = true,
+   .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
@@ -505,6 +505,24 @@ static const struct kfd_device_info 
sienna_cichlid_device_info = {
.num_sdma_queues_per_engine = 8,
 };
 
+static const struct kfd_device_info sienna_cichlid_vf_device_info = {
+   .asic_family = CHIP_SIENNA_CICHLID,
+   .asic_name = "sienna_cichlid",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = _interrupt_class_v10,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = false,
+   .needs_pci_atomics = true,
+   .num_sdma_engines = 4,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 static const struct kfd_device_info navy_flounder_device_info = {
.asic_family = CHIP_NAVY_FLOUNDER,
.asic_name = "navy_flounder",
@@ -601,7 +619,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {_device_info, NULL},
[CHIP_NAVI12] = {_device_info, _device_info},
[CHIP_NAVI14] = {_device_info, NULL},
-   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_device_info},
+   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_vf_device_info},
[CHIP_NAVY_FLOUNDER] = {_flounder_device_info, 
_flounder_device_info},
[CHIP_VANGOGH] = {_device_info, NULL},
[CHIP_DIMGREY_CAVEFISH] = {_cavefish_device_info, 
_cavefish_device_info},
@@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
 static void kfd_cwsr_init(struct kfd_dev *kfd)
 {
-   if (cwsr_enable && kfd->device_info->supports_cwsr) {
+   if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 
2) {
if (kfd->device_info->asic_family < CHIP_VEGA10) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 0/3] A drm_plane API to support HDR planes

On Tue, 18 May 2021 10:19:25 -0400
Harry Wentland  wrote:

> On 2021-05-18 3:56 a.m., Pekka Paalanen wrote:
> > On Mon, 17 May 2021 15:39:03 -0400
> > Vitaly Prosyak  wrote:
> >   
> >> On 2021-05-17 12:48 p.m., Sebastian Wick wrote:  

...

> >>> I suspect that this is not about tone mapping at all. The use cases
> >>> listed always have the display in PQ mode and just assume that no
> >>> content exceeds the PQ limitations. Then you can simply bring all
> >>> content to the color space with a matrix multiplication and then map the
> >>> linear light content somewhere into the PQ range. Tone mapping is
> >>> performed in the display only.  
> > 
> > The use cases do use the word "desktop" though. Harry, could you expand
> > on this, are you seeking a design that is good for generic desktop
> > compositors too, or one that is more tailored to "embedded" video
> > player systems taking the most advantage of (potentially
> > fixed-function) hardware?
> >   
> 
> The goal is to enable this on a generic desktop, such as generic Wayland
> implementations or ChromeOS. We're not looking for a custom solution for
> some embedded systems, though the solution we end up with should obviously
> not prevent an implementation on embedded video players.

(There is a TL;DR: at the end.)

Echoing a little bit what Sebastian already said, I believe there are
two sides to this again:
- color management in the traditional sense
- modern standardised display technology

It was perhaps too harsh to say that generic Wayland compositors cannot
use enum-based color-related UAPI. Sometimes they could, sometimes it
won't be good enough.

Traditional color management assumes that no two monitors are the same,
even if they are the same make, model, and manufacturing batch, and are
driven exactly the same way. Hence, all monitors may require
calibration (adjusting monitor knobs), and/or they may require
profiling (measuring the light emission with a special hardware device
designed for that). Also the viewing environment has an effect.

For profiling to be at all meaningful, calibration must be fixed. This
means that there must be no dynamic on-the-fly adaptation done in the
monitor, in the display hardware, or in the kernel. That is a tall
order that I guess is going to be less and less achievable, especially
with HDR monitors.

The other side is where the end user trusts the standards, and trusts
that the drivers and the hardware do what they are specified to do.
This is where you can trust that the monitor does the tone-mapping magic
right.

Weston needs to support both approaches, because we want to prove our
new approach to traditional color management, but we also want to
support HDR, and if possible, do both at the same time. Doing both at
the same time is what we think foremost, because it's also the hardest
thing to achieve. If that can be done, then everything else works out
too.

However, this should not exclude the possibility to trust standards and
monitor magic, when the end user wants it.

It's also possible that a monitor simply doesn't support a mode that
would enable fully color managed HDR, so Weston will need to be able to
drive monitors with e.g. BT.2020/PQ data eventually. It's just not the
first goal we have.

This debate is a little bit ironic. The Wayland approach to traditional
color management is that end users should trust the display server to
do the right thing, where before people only trusted the individual
apps using a specific CMS implementation. The display server was the
untrusted one that should just get out of the way and not touch
anything. Now I'm arguing that I don't want to trust monitor magic, who
knows what atrocities it does to my picture! But take the next logical
step, and one would be arguing that end users should trust also
monitors to do the right thing. :-)

The above has two catches:

- Do you actually trust hardware manufacturers and marketers and EDID?
  Monitors have secret sauce you can't inspect nor change.

- You feed a single video stream to a monitor, in a single format,
  encoding and color space. The display server OTOH gets an arbitrary
  number of input video streams in arbitrary formats, encodings, and
  color spaces, and it needs to composite them into one.

Composition is hard. It's not enough to know what kind of signals you
take in and what kind of signal you must output. You also need to know
what the end user wants from the result: the render intent.

Even if we trust the monitor magic to do the right thing in
interpreting and displaying our output signal, we still need to know
what the end user wants from the composition, and we need to control
the composition formula to achieve that.

TL;DR:

I would summarise my comments so far into these:

- Telling the kernel the color spaces and letting it come up with
  whatever color transformation formula from those is not enough,
  because it puts the render intent policy decision in the kernel.

- Telling the kernel what

Re: AMDGPU error: "[drm:amdgpu_dm_atomic_commit_tail [amdgpu]] ERROR Waiting for fences timed out!"

2021-05-19 Thread Michel Dänzer

On 2021-05-19 12:05 a.m., Alex Deucher wrote:
> On Tue, May 18, 2021 at 10:11 AM Michel Dänzer  wrote:
>>
>> On 2021-05-17 11:33 a.m., xgqt wrote:
>>> Hello!
>>>
>>> I run a AMD laptop "81NC Lenovo IdeaPad S340-15API" - AMD Ryzen 5 3500U 
>>> with Radeon Vega 8 Graphics.
>>> Recently some breakages started happening for me. In about 1h after boot-up 
>>> while using a KDE desktop machine GUI would freeze. Sometimes it would be 
>>> possible to move the mouse but the rest will be frozen. Screen may start 
>>> blinking or go black.
>>>
>>> I'm not sure if this is my kernel, firmware or the hardware.
>>> I don't understands dmesg that's why I'm guessing, but I think it is the 
>>> firmware since this behavior started around 2021-05-15.
>>> From my Portage logs I see that I updated my firmware on 2021-05-14 at 
>>> 18:16:06.
>>> So breakages started with my kernel: 5.10.27 and FW: 20210511.
>>> After breakage I jumped to a older kernel 5.4.97 and compiled 5.12.4. I 
>>> didn't notice a breakage on 5.4.97 but system ran ~40 minutes.
>>> So I booted to newly compiled 5.12.4 where I was ~1h and it broke.
>>> After that I booted to 5.4.97 again and downgraded my FW.
>>> While I'm writing this I'm booted to kernel: 5.12.4 with FW: 20210315.
>>>
>>> I also described my situation on the Gentoo bugzilla: 
>>> https://bugs.gentoo.org/790566
>>>
>>> "dmesg.log" attached here is from the time machine run fine (at the 
>>> moment); "errors_sat_may_15_072825_pm_cest_2021.log" is a dmesg log from 
>>> the time system broke
>>>
>>> Can I get any help with this? What are the next steps I should take? Any 
>>> other files I should provide?
>>
>> I've hit similar hangs with a Lenovo ThinkPad E595 (Ryzen 7 3700U / Picasso 
>> / RAVEN 0x1002:0x15D8 0x17AA:0x5124 0xC1). I'm also suspecting them to be 
>> firware related. The hangs occurred with firmware from the AMD 20.50 
>> release. I'm currently running with firmware from the 20.40 release, no hang 
>> in almost 2 weeks (the hangs happened within 1-2 days after boot).
> 
> Can you narrow down which firmware(s) cause the problem?

I'll try, but note I'm not really sure yet my hangs were related to firmware 
(only). Anyway, I'll try narrowing it down.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] gpu: drm: replace occurrences of invalid character

2021-05-19 Thread Mauro Carvalho Chehab

There are some places at drm that ended receiving a
REPLACEMENT CHARACTER U+fffd ('�'), probably because of
some bad charset conversion.

Fix them by using what it seems to be the proper
character.

Signed-off-by: Mauro Carvalho Chehab 
---
 drivers/gpu/drm/amd/include/atombios.h   | 10 +-
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.h|  2 +-
 drivers/gpu/drm/r128/r128_drv.h  |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/atombios.h 
b/drivers/gpu/drm/amd/include/atombios.h
index 47eb84598b96..6a505d1b82a5 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5178,11 +5178,11 @@ typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
 typedef struct  _ATOM_SVID2_VOLTAGE_OBJECT_V3
 {
ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;// voltage mode = 
VOLTAGE_OBJ_SVID2
-// 14:7 � PSI0_VID
-// 6 � PSI0_EN
-// 5 � PSI1
-// 4:2 � load line slope trim.
-// 1:0 � offset trim,
+// 14:7 - PSI0_VID
+// 6 - PSI0_EN
+// 5 - PSI1
+// 4:2 - load line slope trim.
+// 1:0 - offset trim,
USHORT   usLoadLine_PSI;
 // GPU GPIO pin Id to SVID2 regulator VRHot pin. possible value 0~31. 0 means 
GPIO0, 31 means GPIO31
UCHARucSVDGpioId; //0~31 indicate GPIO0~31
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 14e2ffb6c0e5..2694dbb9967e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT*/
 /*
- * Copyright � 2003-2018 Intel Corporation
+ * Copyright © 2003-2018 Intel Corporation
  */
 
 #ifndef _INTEL_GPU_COMMANDS_H_
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
b/drivers/gpu/drm/i915/i915_gpu_error.h
index 16bc42de4b84..4df24c737e13 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -1,7 +1,7 @@
 /*
  * SPDX-License-Identifier: MIT
  *
- * Copyright � 2008-2018 Intel Corporation
+ * Copyright © 2008-2018 Intel Corporation
  */
 
 #ifndef _I915_GPU_ERROR_H_
diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h
index 8b256123cf2b..c4d0e21280b9 100644
--- a/drivers/gpu/drm/r128/r128_drv.h
+++ b/drivers/gpu/drm/r128/r128_drv.h
@@ -29,7 +29,7 @@
  *Rickard E. (Rik) Faith 
  *Kevin E. Martin 
  *Gareth Hughes 
- *Michel D�zer 
+ *Michel Däzer 
  */
 
 #ifndef __R128_DRV_H__
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH 1/3] drm/color: Add RGB Color encodings

On Tue, 18 May 2021 10:32:48 -0400
Harry Wentland  wrote:

> On 2021-05-17 4:34 a.m., Pekka Paalanen wrote:
> > On Fri, 14 May 2021 17:04:51 -0400
> > Harry Wentland  wrote:
> >   
> >> On 2021-04-30 8:53 p.m., Sebastian Wick wrote:  
> >>> On 2021-04-26 20:56, Harry Wentland wrote:
> > 
> > ...
> >   
>  Another reason I'm proposing to define the color space (and gamma) of
>  a plane is to make this explicit. Up until the color space and gamma
>  of a plane or framebuffer are not well defined, which leads to drivers
>  assuming the color space and gamma of a buffer (for blending and other
>  purposes) and might lead to sub-optimal outcomes.
> >>>
> >>> Blending only is "correct" with linear light so that property of the
> >>> color space is important. However, why does the kernel have to be
> >>> involved here? As long as user space knows that for correct blending the
> >>> data must represent linear light and knows when in the pipeline blending
> >>> happens it can make sure that the data at that point in the pipeline
> >>> contains linear light.
> >>> 
> >>
> >> The only reason the kernel needs to be involved is to take full advantage
> >> of the available HW without requiring KMS clients to be aware of
> >> the difference in display HW.  
> > 
> > Can you explain in more tangible examples, why you think so, please?
> > 
> > Is it because hardware does not fit the KMS UAPI model of the abstract
> > pixel pipeline?
> >   
> 
> I'd wager no HW is designed to meet KMS UAPI, rather KMS UAPI is designed
> to abstract HW.

Of course, but you are in big trouble in any case if there is a
fundamental mismatch. You may have to declare that all existing KMS
properties for this stuff will be mutually exclusive with your new
properties, so that you can introduce a new generic abstract pipeline
in KMS.

By mutually exclusive I mean that a driver must advertise only one or
the other set of properties and never both. If you want to support
userspace that doesn't understand the alternative set, maybe you also
need a drm client cap to switch to the alternative set per-drm-client.

> > Or is it because you have fixed-function hardware elements that you can
> > only make use of when userspace uses an enum-based UAPI?
> >   
> 
> One example is our degamma on our latest generation HW, where we have
> fixed-function "degamma" (rather de-EOTF):
> 
> https://gitlab.freedesktop.org/agd5f/linux/-/blob/amd-staging-drm-next/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c#L166

Ok.

> > I would totally agree that the driver does not want to be analysing LUT
> > entries to decipher if it could use a fixed-function element or not. It
> > would introduce uncertainty in the UAPI. So fixed-function elements
> > would need their own properties, but I don't know if that is feasible
> > as generic UAPI or if it should be driver-specific (and so left unused
> > by generic userspace).
> >   
> 
> 
> For the CRTC LUTs we actually do a linearity check to program the
> HW into bypass when the LUT is linear since the KMS LUT definition
> doesn't map well onto the LUT definition used by our HW and leads
> to rounding errors and failing IGT kms_color tests (if I remember
> this correctly).
> 
> https://gitlab.freedesktop.org/agd5f/linux/-/blob/amd-staging-drm-next/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c#L330
> 
> Hence the suggestion to define pre-defined TFs right at a KMS level
> for usecases where we can assume the display will tonemap the 
> content.

Right. Explaining this would have been a good introduction in your
cover letter.

Maybe you want to define new KMS properties that shall be mutually
exclusive with the existing KMS GAMMA/CTM/DEGAMMA properties and
clearly document them as such.


Thanks,
pq


pgpEd5KggM7yx.pgp
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu/pm: display vcn pp dpm

Enable displaying DPM levels for VCN clocks
in swsmu supported ASICs

Signed-off-by: David M Nieto 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 46 +++
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  2 +
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  8 
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 34 ++
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 46 +++
 5 files changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 77693bf0840c..1735a96dd307 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -822,6 +822,52 @@ static int arcturus_print_clk_levels(struct smu_context 
*smu,
now) ? "*" : ""));
break;
 
+   case SMU_VCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_VCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current vclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get vclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
+   case SMU_DCLK:
+   ret = arcturus_get_current_clk_freq_by_table(smu, SMU_DCLK, 
);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get current dclk 
Failed!");
+   return ret;
+   }
+
+   single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
+   ret = arcturus_get_clk_table(smu, , single_dpm_table);
+   if (ret) {
+   dev_err(smu->adev->dev, "Attempt to get dclk levels 
Failed!");
+   return ret;
+   }
+
+   for (i = 0; i < single_dpm_table->count; i++)
+   size += sprintf(buf + size, "%d: %uMhz %s\n",
+   i, single_dpm_table->dpm_levels[i].value,
+   (clocks.num_levels == 1) ? "*" :
+   (arcturus_freqs_in_same_level(
+   clocks.data[i].clocks_in_khz / 1000,
+   now) ? "*" : ""));
+   break;
+
case SMU_PCIE:
gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu);
lane_width = smu_v11_0_get_current_pcie_link_width_level(smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index b8971303a873..7763de464678 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1273,6 +1273,8 @@ static int navi10_print_clk_levels(struct smu_context 
*smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_DCLK:
case SMU_DCEFCLK:
ret = navi10_get_current_clk_freq_by_table(smu, clk_type, 
_value);
if (ret)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 0c40a54c46d7..6da6d08d8858 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -987,6 +987,10 @@ static int sienna_cichlid_print_clk_levels(struct 
smu_context *smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_VCLK1:
+   case SMU_DCLK:
+   case SMU_DCLK1:
case SMU_DCEFCLK:
ret = sienna_cichlid_get_current_clk_freq_by_table(smu, 
clk_type, _value);
if (ret)
@@ -1150,6 +1154,10 @@ static int sienna_cichlid_force_clk_levels(struct 
smu_context *smu,
case SMU_MCLK:
case SMU_UCLK:
case SMU_FCLK:
+   case SMU_VCLK:
+   case SMU_VCLK1:
+   case SMU_DCLK:
+   case SMU_DCLK1:
/* There is only 2 levels for fine grained DPM */
if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
soft_max_level = (soft_max_level >= 1 ? 1 : 0);
diff --git

[PATCH 2/3] drm/amdgpu/pm: add new fields for Navi1x

Fill voltage fields in metrics table

Signed-off-by: David M Nieto 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 58 +--
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index ac13042672ea..b8971303a873 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -505,7 +505,7 @@ static int navi10_tables_init(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -2627,8 +2627,8 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_legacy_t metrics;
int ret = 0;
 
@@ -2646,7 +2646,7 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
mutex_unlock(>metrics_lock);
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2681,17 +2681,23 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
+   gpu_metrics->voltage_gfx = (155000 - 625 * 
metrics.CurrGfxVoltageOffset) / 100;
+   gpu_metrics->voltage_mem = (155000 - 625 * metrics.CurrMemVidOffset) / 
100;
+   gpu_metrics->voltage_soc = (155000 - 625 * 
metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v1_1);
+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
 }
 
 static ssize_t navi10_get_gpu_metrics(struct smu_context *smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
 
@@ -2709,7 +2715,7 @@ static ssize_t navi10_get_gpu_metrics(struct smu_context 
*smu,
 
mutex_unlock(>metrics_lock);
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2746,17 +2752,23 @@ static ssize_t navi10_get_gpu_metrics(struct 
smu_context *smu,
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
+   gpu_metrics->voltage_gfx = (155000 - 625 * 
metrics.CurrGfxVoltageOffset) / 100;
+   gpu_metrics->voltage_mem = (155000 - 625 * metrics.CurrMemVidOffset) / 
100;
+   gpu_metrics->voltage_soc = (155000 - 625 * 
metrics.CurrSocVoltageOffset) / 100;
+
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v1_1);
+   return sizeof(struct gpu_metrics_v1_3);
+out:
+   return ret;
 }
 
 static ssize_t navi12_get_legacy_gpu_metrics(struct smu_context *smu,
 void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_NV12_legacy_t metrics;
int ret = 0;
 
@@ -2774,7 +2786,7 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
mutex_unlock(>metrics_lock);
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2814,17 +2826,23 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct 
smu_context *smu,
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
+   gpu_metrics->voltage_gfx = (155000 - 625 *

[PATCH 1/3] drm/amdgpu/pm: Update metrics table