[PATCH 4/4] drm/amdgpu: indirect register access for nv12 sriov

2021-03-30 Thread Peng Ju Zhou
1. expand rlcg interface for gc & mmhub indirect access
2. add rlcg interface for no kiq

Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h|   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 131 ++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/soc15_common.h  |  75 ++--
 5 files changed, 150 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 060d0ae99453..438e2f732377 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -490,7 +490,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
-   return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
+   return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index aeaaae713c59..4fc2ce8ce8ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -127,7 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
-   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+   void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag);
+   u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 flag);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index b4fd0394cd08..85a6a10e048f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -177,6 +177,11 @@
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid  0x2030
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0
 
+#define GFX_RLCG_GC_WRITE_OLD  (0x8 << 28)
+#define GFX_RLCG_GC_WRITE  (0x0 << 28)
+#define GFX_RLCG_GC_READ   (0x1 << 28)
+#define GFX_RLCG_MMHUB_WRITE   (0x2 << 28)
+
 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -1422,38 +1427,127 @@ static const struct soc15_reg_golden 
golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x, 0x0080)
 };
 
-static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, 
uint32_t *flag, bool write)
+{
+   /* always programed by rlcg, only for gc */
+   if (offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI) ||
+   offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO) ||
+   offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH) ||
+   offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL) ||
+   offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX) ||
+   offset == SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)) {
+   if (!amdgpu_sriov_reg_indirect_gc(adev))
+   *flag = GFX_RLCG_GC_WRITE_OLD;
+   else
+   *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
+
+   return true;
+   }
+
+   /* currently support gc read/write, mmhub write */
+   if (offset >= SOC15_REG_OFFSET(GC, 0, mmSDMA0_DEC_START) &&
+   offset <= SOC15_REG_OFFSET(GC, 0, mmRLC_GTS_OFFSET_MSB)) {
+   if (amdgpu_sriov_reg_indirect_gc(adev))
+   *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
+   else
+   return false;
+   } else {
+   if (amdgpu_sriov_reg_indirect_mmhub(adev))
+   *flag = GFX_RLCG_MMHUB_WRITE;
+   else
+   return false;
+   }
+
+   return true;
+}
+
+static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
static void *scratch_reg0;
static void *scratch_reg1;
+   static void *scratch_reg2;
+   static void *scratch_reg3;
static void *spare_int;
+   static uint32_t grbm_cntl;
+   static uint32_t grbm_idx;
uint32_t i = 0;
uint32_t retries = 5;
+   u32 ret = 0;
+
+   scratch_reg0 = adev->rmmio +
+  (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + 
mmSCRATCH_REG0) * 4;
+   scratch_reg1 = adev->rmmio +
+  

[PATCH 3/4] drm/amdgpu: indirect register access for nv12 sriov

2021-03-30 Thread Peng Ju Zhou
using the control bits got from host to control registers access.

Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 17 +
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b62f134d6dd5..0c9c5255aa42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -466,6 +466,8 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device 
*adev)
((struct amd_sriov_msg_pf2vf_info 
*)pf2vf_info)->vf2pf_update_interval_ms;
adev->virt.gim_feature =
((struct amd_sriov_msg_pf2vf_info 
*)pf2vf_info)->feature_flags.all;
+   adev->virt.reg_access =
+   ((struct amd_sriov_msg_pf2vf_info 
*)pf2vf_info)->reg_access_flags.all;
 
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 3580a746ad9a..383d4bdc3fb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -228,6 +228,7 @@ struct amdgpu_virt {
bool tdr_debug;
struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
bool ras_init_done;
+   uint32_t reg_access;
 
/* vf2pf message */
struct delayed_work vf2pf_work;
@@ -249,6 +250,22 @@ struct amdgpu_virt {
 #define amdgpu_sriov_fullaccess(adev) \
 (amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
 
+#define amdgpu_sriov_reg_indirect_en(adev) \
+(amdgpu_sriov_vf((adev)) && \
+   ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
+
+#define amdgpu_sriov_reg_indirect_ih(adev) \
+(amdgpu_sriov_vf((adev)) && \
+   ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
+
+#define amdgpu_sriov_reg_indirect_mmhub(adev) \
+(amdgpu_sriov_vf((adev)) && \
+   ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_gc(adev) \
+(amdgpu_sriov_vf((adev)) && \
+   ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+
 #define amdgpu_passthrough(adev) \
 ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/4] drm/amdgpu: indirect register access for nv12 sriov

2021-03-30 Thread Peng Ju Zhou
get pf2vf msg info at it's earliest time so that
guest driver can use these info to decide whether
register indirect access enabled.

Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 8 
 2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a501d1a4d000..060d0ae99453 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2080,6 +2080,11 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
amdgpu_vf_error_put(adev, 
AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
return r;
}
+
+   /*get pf2vf msg info at it's earliest time*/
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_init_data_exchange(adev);
+
}
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index d98eb41d..b62f134d6dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -617,6 +617,14 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, 
bp_block_offset, bp_block_size);
}
+   } else if (adev->bios != NULL) {
+   adev->virt.fw_reserve.p_pf2vf =
+   (struct amd_sriov_msg_pf2vf_info_header *)
+   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+
+   amdgpu_virt_read_pf2vf_data(adev);
+
+   return;
}
 
if (adev->virt.vf2pf_update_interval_ms != 0) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/4] drm/amdgpu: indirect register access for nv12 sriov

2021-03-30 Thread Peng Ju Zhou
unify host driver and guest driver indirect access
control bits names

Signed-off-by: Peng Ju Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 097af4d3b6b1..3580a746ad9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -110,11 +110,11 @@ enum AMDGIM_FEATURE_FLAG {
 
 enum AMDGIM_REG_ACCESS_FLAG {
/* Use PSP to program IH_RB_CNTL */
-   AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+   AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
/* Use RLC to program MMHUB regs */
-   AMDGIM_FEATURE_RLC_MMHUB_EN  = (1 << 1),
+   AMDGIM_FEATURE_MMHUB_REG_RLC_EN  = (1 << 1),
/* Use RLC to program GC regs */
-   AMDGIM_FEATURE_RLC_GC_EN = (1 << 2),
+   AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
 };
 
 struct amdgim_pf2vf_info_v1 {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/display: guard ttu_regs with CONFIG_DRM_AMD_DC_DCN

2021-03-30 Thread Alex Deucher
Missing check for CONFIG_DRM_AMD_DC_DCN.

Fixes: 752106f5c5cd ("drm/amd/display: Set max TTU on DPG enable")
Signed-off-by: Alex Deucher 
Cc: Wesley Chalmers 
Cc: Stephen Rothwell 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index d9ab134a178f..a270879cbaba 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2599,6 +2599,7 @@ static void commit_planes_for_stream(struct dc *dc,
}
}
 
+#ifdef CONFIG_DRM_AMD_DC_DCN
if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
struct pipe_ctx *mpcc_pipe;
struct pipe_ctx *odm_pipe;
@@ -2607,7 +2608,7 @@ static void commit_planes_for_stream(struct dc *dc,
for (odm_pipe = mpcc_pipe; odm_pipe; odm_pipe = 
odm_pipe->next_odm_pipe)
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
-
+#endif
 
if ((update_type != UPDATE_TYPE_FAST) && 
stream->update_flags.bits.dsc_changed)
if 
(top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] Revert "amd/amdgpu: Disable VCN DPG mode for Picasso"

2021-03-30 Thread Quan, Evan
[AMD Public Use]

Acked-by: Evan Quan 

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Thursday, March 11, 2021 10:20 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Gopalakrishnan,
> Veerabadhran (Veera) ; Liu, Leo
> 
> Subject: [PATCH] Revert "amd/amdgpu: Disable VCN DPG mode for Picasso"
> 
> This reverts commit c6d2b0fbb893d5c7dda405aa0e7bcbecf1c75f98.
> 
> This patch is a workaround for a hardware bug, but I don't know that we've
> actually seen the hw bug triggered in practice, meanwhile a number of
> people have reported that this causes suspend and resume issues.
> 
> Bug:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugz
> illa.kernel.org%2Fshow_bug.cgi%3Fid%3D211277data=04%7C01%7Cev
> an.quan%40amd.com%7C106f649374b14dc3383108d8e498c485%7C3dd8961f
> e4884e608e11a82d994e183d%7C0%7C0%7C637510692088534999%7CUnknow
> n%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1ha
> WwiLCJXVCI6Mn0%3D%7C1000sdata=yLxYzonL8VrQui3MrFzsgA2FIM
> mRYt3e40jtvNCtRRM%3Dreserved=0
> Cc: Veerabadhran Gopalakrishnan 
> Cc: Leo Liu 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 3808402cd964..1a0c3a816863 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -1405,7 +1405,8 @@ static int soc15_common_early_init(void *handle)
> 
>   adev->pg_flags = AMD_PG_SUPPORT_SDMA |
>   AMD_PG_SUPPORT_MMHUB |
> - AMD_PG_SUPPORT_VCN;
> + AMD_PG_SUPPORT_VCN |
> + AMD_PG_SUPPORT_VCN_DPG;
>   } else {
>   adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
>   AMD_CG_SUPPORT_GFX_MGLS |
> --
> 2.29.2
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.
> freedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7Cevan.quan%40amd.com%7C106f649374b14dc33
> 83108d8e498c485%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63
> 7510692088534999%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMD
> AiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=
> hG2dxaaaNNQEqgpKP5opDH3reYhZYKY%2B7q1n4aEPoqA%3Dreserve
> d=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/2] ensure alignment on CPU page for bo mapping

2021-03-30 Thread Alex Deucher
Applied.  Thanks!

Alex

On Tue, Mar 30, 2021 at 12:21 PM Christian König
 wrote:
>
> Reviewed-by: Christian König  for the entire
> series.
>
> Alex will probably pick them up for the next feature pull request.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 17:33 schrieb Xℹ Ruoyao:
> > In AMDGPU driver, the bo mapping should always align to CPU page or
> > the page table is corrupted.
> >
> > The first patch is cherry-picked from Loongson community, which sets a
> > suitable dev_info.gart_page_size so Mesa will handle the alignment
> > correctly.
> >
> > The second patch is added to ensure an ioctl with unaligned parameter to
> > be rejected -EINVAL, instead of causing page table corruption.
> >
> > The patches should be applied for drm-next.
> >
> > Huacai Chen (1):
> >drm/amdgpu: Set a suitable dev_info.gart_page_size
> >
> > Xℹ Ruoyao (1):
> >drm/amdgpu: check alignment on CPU page for bo map
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 
> >   2 files changed, 6 insertions(+), 6 deletions(-)
> >
> >
> > base-commit: a0c8b193bfe81cc8e9c7c162bb8d777ba12596f0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Re: [PATCH] amd: display: dc: struct dc_state is declared twice

2021-03-30 Thread Alex Deucher
On Mon, Mar 29, 2021 at 9:36 PM 万家兵  wrote:
>
>
> >On Sat, Mar 27, 2021 at 3:28 AM Wan Jiabing  wrote:
> >>
> >> struct dc_state has been declared at 273rd line.
> >> Remove the duplicate.
> >> Delete duplicate blank lines.
> >
> >Can you split these into separate patches?
> >
> >Alex
>
> OK. But in fact, what I did  is simple.
> The most important thing is removing the duplicate
> struct dc_state declaration at 585th line.
> Others are all deleting duplicate blank lines.
>
> So maybe I should send two patchs, one is removing
> duplicate declaration, the other is deleting blank lines?
>

Yes.  There are so many whitespace changes in the commit that it's
hard to see the functional change.

Alex


> >>
> >> Signed-off-by: Wan Jiabing 
> >> ---
> >>  drivers/gpu/drm/amd/display/dc/dc.h | 10 --
> >>  1 file changed, 10 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
> >> b/drivers/gpu/drm/amd/display/dc/dc.h
> >> index 18ed0d3f247e..dc667298ab5b 100644
> >> --- a/drivers/gpu/drm/amd/display/dc/dc.h
> >> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
> >> @@ -234,7 +234,6 @@ struct dc_static_screen_params {
> >> unsigned int num_frames;
> >>  };
> >>
> >> -
> >>  /* Surface update type is used by dc_update_surfaces_and_stream
> >>   * The update type is determined at the very beginning of the function 
> >> based
> >>   * on parameters passed in and decides how much programming (or updating) 
> >> is
> >> @@ -272,7 +271,6 @@ struct dc;
> >>  struct dc_plane_state;
> >>  struct dc_state;
> >>
> >> -
> >>  struct dc_cap_funcs {
> >> bool (*get_dcc_compression_cap)(const struct dc *dc,
> >> const struct dc_dcc_surface_param *input,
> >> @@ -281,7 +279,6 @@ struct dc_cap_funcs {
> >>
> >>  struct link_training_settings;
> >>
> >> -
> >>  /* Structure to hold configuration flags set by dm at dc creation. */
> >>  struct dc_config {
> >> bool gpu_vm_support;
> >> @@ -581,7 +578,6 @@ struct dc_bounding_box_overrides {
> >> int min_dcfclk_mhz;
> >>  };
> >>
> >> -struct dc_state;
>
> Removing the duplicate is here.
> And others are all deleting duplicate blank line.
>
> I think they are in the same file. I want to remove the declaration first.
> By the way, I deleted the blank line.
>
> Yours,
> Wan Jiabing
>
> >>  struct resource_pool;
> >>  struct dce_hwseq;
> >>  struct gpu_info_soc_bounding_box_v1_0;
> >> @@ -757,7 +753,6 @@ enum dc_transfer_func_predefined {
> >> TRANSFER_FUNCTION_GAMMA26
> >>  };
> >>
> >> -
> >>  struct dc_transfer_func {
> >> struct kref refcount;
> >> enum dc_transfer_func_type type;
> >> @@ -770,7 +765,6 @@ struct dc_transfer_func {
> >> };
> >>  };
> >>
> >> -
> >>  union dc_3dlut_state {
> >> struct {
> >> uint32_t initialized:1; /*if 3dlut is went through 
> >> color module for initialization */
> >> @@ -784,7 +778,6 @@ union dc_3dlut_state {
> >> uint32_t raw;
> >>  };
> >>
> >> -
> >>  struct dc_3dlut {
> >> struct kref refcount;
> >> struct tetrahedral_params lut_3d;
> >> @@ -1014,7 +1007,6 @@ enum dc_status dc_validate_global_state(
> >> struct dc_state *new_ctx,
> >> bool fast_validate);
> >>
> >> -
> >>  void dc_resource_state_construct(
> >> const struct dc *dc,
> >> struct dc_state *dst_ctx);
> >> @@ -1167,7 +1159,6 @@ struct dc_container_id {
> >> unsigned short productCode;
> >>  };
> >>
> >> -
> >>  struct dc_sink_dsc_caps {
> >> // 'true' if these are virtual DPCD's DSC caps (immediately 
> >> upstream of sink in MST topology),
> >> // 'false' if they are sink's DSC caps
> >> @@ -1229,7 +1220,6 @@ struct dc_cursor {
> >> struct dc_cursor_attributes attributes;
> >>  };
> >>
> >> -
> >>  
> >> /***
> >>   * Interrupt interfaces
> >>   
> >> **/
> >> --
> >> 2.25.1
> >>
> >> ___
> >> dri-devel mailing list
> >> dri-de...@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

Ping ..

>-Original Message-
>From: Emily Deng 
>Sent: Tuesday, March 30, 2021 5:43 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov
>
>From: "Emily.Deng" 
>
>For vf assigned to guest VM, after FLR, the msix table will be reset.
>As the flr is done on host driver. The qemu and vfio driver don't know this,
>and the msix is still enable from qemu and vfio driver side.
>So if want to  re-setup the msix table, first need to disable and re-enable the
>msix from guest VM side or the qemu will do nothing as it thought the msix is
>already enabled.
>
>v2:
>Change name with amdgpu_irq prefix, remove #ifdef.
>
>Signed-off-by: Emily.Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
> 1 file changed, 14 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>index 03412543427a..3045f52e613d 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device
>*adev)
> return true;
> }
>
>+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
>+u16 ctrl;
>+
>+pci_read_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, );
>+ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
>+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, ctrl);
>+ctrl |= PCI_MSIX_FLAGS_ENABLE;
>+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>+PCI_MSIX_FLAGS, ctrl); }
>+
> /**
>  * amdgpu_irq_init - initialize interrupt handling
>  *
>@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct
>amdgpu_device *adev)  {
> int i, j, k;
>
>+if (amdgpu_sriov_vf(adev))
>+amdgpu_irq_restore_msix(adev);
>+
> for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
> if (!adev->irq.client[i].sources)
> continue;
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Avoid null pointer in SMI event

2021-03-30 Thread Amber Lin
Power Management IP is initialized/enabled before KFD init. When a
thermal throttling happens before kfd_smi_init is done, calling the KFD
SMI update function causes a stack dump by referring a NULL pointer (
smi_clients list). Check if kfd_init is completed before calling the
function.

Signed-off-by: Amber Lin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 72c893fff61a..3cd46d7190b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1297,7 +1297,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
 
 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
 {
-   if (kfd)
+   if (kfd && kfd->init_complete)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Alberto Salvia Novella
The frame-rate at 24Hz is extremely poor for normal desktop usage.

If the highest resolution, aka 1080p, uses that refresh rate then the
desktop will default to that frame-rate.

Other progressive modes don't exhibit any issue.

On Tue, 30 Mar 2021 at 18:26, Christian König 
wrote:

> Hi Alberto,
>
> I think the driver should only support resolutions that are *progressive*,
> but also at least of *50Hz*.
>
>
> Why do you think so?, the 24Hz resolution seems to be the native one of
> the display.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:
>
> This is why I'm using interlaced:
>
> $ *xrandr*
> Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
> DisplayPort-0 disconnected (normal left inverted right x axis y axis)
> HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x axis
> y axis) 16mm x 9mm
>1920x*1080i*60.00*+  50.0059.94
>1920x1080 *24.00*23.98
>1280x*720*  60.0050.0059.94
>1024x768  75.0370.0760.00
>832x624   74.55
>800x600   72.1975.0060.3256.25
>720x576   50.00
>720x576i  50.00
>720x480   60.0059.94
>720x480i  60.0059.94
>640x480   75.0072.8166.6760.0059.94
>720x400   70.08
> DVI-0 disconnected (normal left inverted right x axis y axis)
>
> I think the driver should only support resolutions that are *progressive*,
> but also at least of *50Hz*.
>
> On Tue, 30 Mar 2021 at 15:41, Christian König <
> ckoenig.leichtzumer...@gmail.com> wrote:
>
>> Mhm, no idea why an interlaced resolution would cause a crash. Maybe some
>> miscalculation in the display code.
>>
>> But apart from that if you just connected your PC to a TV I also wouldn't
>> recommend using an interlaced resolution in the first place.
>>
>> See those resolutions only exists for backward compatibility with analog
>> hardware.
>>
>> I think we would just disable those modes instead of searching for the
>> bug.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:
>>
>> I guessed so.
>>
>> The GPU is a Radeon HD5870, and the screen is an old Telefunken TV
>> (TLFK22LEDPVR1).
>>
>> Since my real display got into repair I used this TV meanwhile, and to my
>> surprise it froze the system.
>>
>> On Tue, 30 Mar 2021 at 10:15, Christian König 
>> wrote:
>>
>>> Hi Alberto,
>>>
>>> well what hardware do you have?
>>>
>>> Interlaced resolutions are not used any more on modern hardware, so they
>>> are not well tested.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
>>> > The entire desktop hangs after some minutes when using the module
>>> > "radeon" with an interlaced resolution.
>>> >
>>> > Easier to trigger by playing a video on Firefox, at least on kwin_x11.
>>> > Wayland didn't exhibit the problem.
>>> >
>>> > Other display drivers, from different computers I have tried, didn't
>>> > allow those interlaced resolutions all together. It seems they know
>>> > there will be problems.
>>>
>>>
>> ___
>> amd-gfx mailing 
>> listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>  
>> 
>>
>>
>>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Use pr_debug in DM to prevent dmesg flooding

2021-03-30 Thread Harry Wentland




On 2021-03-30 1:22 p.m., Victor Lu wrote:

[why]
Enabling drm.debug=0x4 can flood the dmesg due to prints on every cursor
update or page flip.



Maybe mention something like this: "Our CI enables drm.debug=0x4 logs 
and is now getting spammed with cursor updates. We probably want to 
avoid spamming the log when DRM_DEBUG_KMS."


The change is
Reviewed-by: Harry Wentland 

Harry


[how]
Define and use pr_debug macros instead of a few spammy DRM_DEBUG_*'s.

Signed-off-by: Victor Lu 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +--
  .../drm/amd/display/include/logger_types.h|  3 +++
  2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 92cee957b424..04dbcbc7578d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -372,14 +372,14 @@ static void dm_pflip_high_irq(void *interrupt_params)
/* IRQ could occur when in initial stage */
/* TODO work and BO cleanup */
if (amdgpu_crtc == NULL) {
-   DRM_DEBUG_DRIVER("CRTC is null, returning.\n");
+   DC_LOG_PFLIP("CRTC is null, returning.\n");
return;
}
  
  	spin_lock_irqsave(_to_drm(adev)->event_lock, flags);
  
  	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){

-   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
+   DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
 amdgpu_crtc->pflip_status,
 AMDGPU_FLIP_SUBMITTED,
 amdgpu_crtc->crtc_id,
@@ -450,9 +450,9 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(_to_drm(adev)->event_lock, flags);
  
-	DRM_DEBUG_KMS("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n",

- amdgpu_crtc->crtc_id, amdgpu_crtc,
- vrr_active, (int) !e);
+   DC_LOG_PFLIP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc,
+vrr_active, (int) !e);
  }
  
  static void dm_vupdate_high_irq(void *interrupt_params)

@@ -482,7 +482,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
atomic64_set(_params->previous_timestamp, 
vblank->time);
}
  
-		DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",

+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d\n",
  acrtc->crtc_id,
  vrr_active);
  
@@ -535,7 +535,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
  
  	vrr_active = amdgpu_dm_vrr_active_irq(acrtc);
  
-	DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,

+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
  vrr_active, acrtc->dm_irq_params.active_planes);
  
  	/**

@@ -7991,7 +7991,7 @@ static void handle_cursor_update(struct drm_plane *plane,
if (!plane->state->fb && !old_plane_state->fb)
return;
  
-	DRM_DEBUG_KMS("%s: crtc_id=%d with size %d to %d\n",

+   DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n",
  __func__,
  amdgpu_crtc->crtc_id,
  plane->state->crtc_w,
@@ -8053,8 +8053,8 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
  
-	DRM_DEBUG_KMS("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",

- acrtc->crtc_id);
+   DC_LOG_PFLIP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+acrtc->crtc_id);
  }
  
  static void update_freesync_state_on_stream(

diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h 
b/drivers/gpu/drm/amd/display/include/logger_types.h
index 21bbee17c527..571fcf23cea9 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -36,6 +36,9 @@
  #define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
  #define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
  #define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
+#define DC_LOG_CURSOR(...) pr_debug("[CURSOR]:"__VA_ARGS__)
+#define DC_LOG_PFLIP(...) pr_debug("[PFLIP]:"__VA_ARGS__)
+#define DC_LOG_VBLANK(...) pr_debug("[VBLANK]:"__VA_ARGS__)
  #define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
  #define DC_LOG_HW_LINK_TRAINING(...) 
pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
  #define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)



___
amd-gfx mailing list

[PATCH 3/4] Revert "drm/amdgpu: workaround the TMR MC address issue (v2)"

2021-03-30 Thread Oak Zeng
This reverts commit 34a33d4683cba7ba63c894132efb1998c0217631.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  | 10 --
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 21 ++---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 10 --
 4 files changed, 10 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b244298..5f53d4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -209,15 +209,6 @@ struct amdgpu_gmc {
 */
u64 fb_start;
u64 fb_end;
-   /* In the case of use GART table for vmid0 FB access, [fb_start, fb_end]
-* will be squeezed to GART aperture. But we have a PSP FW issue to fix
-* for now. To temporarily workaround the PSP FW issue, added below two
-* variables to remember the original fb_start/end to re-enable FB
-* aperture to workaround the PSP FW issue. Will delete it after we
-* get a proper PSP FW fix.
-*/
-   u64 fb_start_original;
-   u64 fb_end_original;
unsignedvram_width;
u64 real_vram_size;
int vram_mtrr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 9e769cf..c555fa7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -407,16 +407,6 @@ static int psp_tmr_init(struct psp_context *psp)
  AMDGPU_GEM_DOMAIN_VRAM,
  >tmr_bo, >tmr_mc_addr, pptr);
 
-   /* workaround the tmr_mc_addr:
-* PSP requires an address in FB aperture. Right now driver produce
-* tmr_mc_addr in the GART aperture. Convert it back to FB aperture
-* for PSP. Will revert it after we get a fix from PSP FW.
-*/
-   if (psp->adev->asic_type == CHIP_ALDEBARAN) {
-   psp->tmr_mc_addr -= psp->adev->gmc.fb_start;
-   psp->tmr_mc_addr += psp->adev->gmc.fb_start_original;
-   }
-
return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 8c0710c..a245e8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -140,21 +140,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
 * FB aperture and AGP aperture. Disable them.
 */
if (adev->gmc.pdb0_bo) {
-   if (adev->asic_type == CHIP_ALDEBARAN) {
-   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 
adev->gmc.fb_end_original >> 24);
-   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 
adev->gmc.fb_start_original >> 24);
-   WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
-   WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFF);
-   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 
adev->gmc.fb_start_original >> 18);
-   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 
adev->gmc.fb_end_original >> 18);
-   } else {
-   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0);
-   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 
0x00FF);
-   WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
-   WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFF);
-   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 
0x3FFF);
-   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 
0);
-   }
+   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0);
+   WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 0x00FF);
+   WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
+   WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFF);
+   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 
0x3FFF);
+   WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 2d8cf7f..7a1880d50 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -47,8 +47,6 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device 
*adev)
 
adev->gmc.fb_start = base;
adev->gmc.fb_end = top;
-   adev->gmc.fb_start_original = base;
-   adev->gmc.fb_end_original = top;
 
return base;
 }
@@ -126,10 +124,10 @@ static void mmhub_v1_7_init_system_aperture_regs(struct 
amdgpu_device *adev)
if (adev->gmc.pdb0_bo) {
WREG32_SOC15(MMHUB, 0, 

[PATCH 2/4] drm/amdgpu: use macros to simplify codes

2021-03-30 Thread Oak Zeng
Use amdgpu_gmc_gpu_pa and amdgpu_gmc_cpu_pa
to simplify codes. No logic change.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c   | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 3 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 3 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c  | 3 +--
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c  | 3 +--
 12 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 4c5c198..564446b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -205,7 +205,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
struct drm_gem_object *gobj = NULL;
struct amdgpu_bo *abo = NULL;
int ret;
-   unsigned long tmp;
 
memset(_cmd, 0, sizeof(mode_cmd));
mode_cmd.width = sizes->surface_width;
@@ -246,8 +245,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 
info->fbops = _ops;
 
-   tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
-   info->fix.smem_start = adev->gmc.aper_base + tmp;
+   info->fix.smem_start = amdgpu_gmc_cpu_pa(adev, abo);
info->fix.smem_len = amdgpu_bo_size(abo);
info->screen_base = amdgpu_bo_kptr(abo);
info->screen_size = amdgpu_bo_size(abo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index b9d68fd..ca659e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -641,8 +641,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
u64 vram_addr = adev->vm_manager.vram_base_offset -
adev->gmc.xgmi.physical_node_id * 
adev->gmc.xgmi.node_segment_size;
u64 vram_end = vram_addr + vram_size;
-   u64 gart_ptb_gpu_pa = amdgpu_bo_gpu_offset(adev->gart.bo) +
-   adev->vm_manager.vram_base_offset - adev->gmc.vram_start;
+   u64 gart_ptb_gpu_pa = amdgpu_gmc_gpu_pa(adev, adev->gart.bo);
 
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
flags |= AMDGPU_PTE_WRITEABLE;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 5bb9856..8c0710c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -120,8 +120,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
/* Set default page address. */
-   value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
-   adev->vm_manager.vram_base_offset;
+   value = amdgpu_gmc_gpu_pa(adev, adev->vram_scratch.robj);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 (u32)(value >> 12));
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 2aecc6a..c2fa371c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -165,8 +165,7 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
/* Set default page address. */
-   value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
-   + adev->vm_manager.vram_base_offset;
+   value = amdgpu_gmc_gpu_pa(adev, adev->vram_scratch.robj);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 (u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 410fd3a..9b575eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -164,8 +164,7 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct 
amdgpu_device *adev)
 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
/* Set default page address. */
-   value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
-   + adev->vm_manager.vram_base_offset;
+   value = amdgpu_gmc_gpu_pa(adev, adev->vram_scratch.robj);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
 (u32)(value >> 12));

[PATCH 4/4] drm/amdgpu: Introduce new SETUP_TMR interface

2021-03-30 Thread Oak Zeng
This new interface passes both virtual and physical address
to PSP. It is backword compatible with old interface.

v2: use a macro to simply tmr physical address calc (Lijo)

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 12 +---
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 11 ++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c555fa7..77a9dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -328,8 +328,12 @@ psp_cmd_submit_buf(struct psp_context *psp,
 
 static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
 struct psp_gfx_cmd_resp *cmd,
-uint64_t tmr_mc, uint32_t size)
+uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
 {
+   struct amdgpu_device *adev = psp->adev;
+   uint32_t size = amdgpu_bo_size(tmr_bo);
+   uint64_t tmr_pa = amdgpu_gmc_gpu_pa(adev, tmr_bo);
+
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
else
@@ -337,6 +341,9 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_size = size;
+   cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1;
+   cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa);
+   cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa);
 }
 
 static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
@@ -456,8 +463,7 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
 
-   psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr,
-amdgpu_bo_size(psp->tmr_bo));
+   psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
 amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 
b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index dd4d65f..96064c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -185,10 +185,19 @@ struct psp_gfx_cmd_setup_tmr
 uint32_tbuf_phy_addr_lo;   /* bits [31:0] of GPU Virtual 
address of TMR buffer (must be 4 KB aligned) */
 uint32_tbuf_phy_addr_hi;   /* bits [63:32] of GPU Virtual 
address of TMR buffer */
 uint32_tbuf_size;  /* buffer size in bytes (must be 
multiple of 4 KB) */
+union {
+   struct {
+   uint32_tsriov_enabled:1; /* whether the device runs 
under SR-IOV*/
+   uint32_tvirt_phy_addr:1; /* driver passes both virtual 
and physical address to PSP*/
+   uint32_treserved:30;
+   } bitfield;
+   uint32_ttmr_flags;
+};
+uint32_tsystem_phy_addr_lo;/* bits [31:0] of system 
physical address of TMR buffer (must be 4 KB aligned) */
+uint32_tsystem_phy_addr_hi;/* bits [63:32] of system 
physical address of TMR buffer */
 
 };
 
-
 /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */
 enum psp_gfx_fw_type {
GFX_FW_TYPE_NONE= 0,/* */
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/4] drm/amdgpu: Macros for vram physical addr calculation

2021-03-30 Thread Oak Zeng
Add one macro to calculate BO's GPU physical address.
And another one to calculate BO's CPU physical address.

Signed-off-by: Oak Zeng 
Suggested-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 7e248a4..b244298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -272,6 +272,9 @@ struct amdgpu_gmc {
 #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) 
(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
 #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) 
(adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
 #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_gpu_va2pa(adev, va) (va - (adev)->gmc.vram_start + 
(adev)->vm_manager.vram_base_offset)
+#define amdgpu_gmc_gpu_pa(adev, bo) amdgpu_gmc_gpu_va2pa(adev, 
amdgpu_bo_gpu_offset(bo))
+#define amdgpu_gmc_cpu_pa(adev, bo) (amdgpu_bo_gpu_offset(bo) - 
(adev)->gmc.vram_start + (adev)->gmc.aper_base)
 
 /**
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Use pr_debug in DM to prevent dmesg flooding

2021-03-30 Thread Victor Lu
[why]
Enabling drm.debug=0x4 can flood the dmesg due to prints on every cursor
update or page flip.

[how]
Define and use pr_debug macros instead of a few spammy DRM_DEBUG_*'s.

Signed-off-by: Victor Lu 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +--
 .../drm/amd/display/include/logger_types.h|  3 +++
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 92cee957b424..04dbcbc7578d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -372,14 +372,14 @@ static void dm_pflip_high_irq(void *interrupt_params)
/* IRQ could occur when in initial stage */
/* TODO work and BO cleanup */
if (amdgpu_crtc == NULL) {
-   DRM_DEBUG_DRIVER("CRTC is null, returning.\n");
+   DC_LOG_PFLIP("CRTC is null, returning.\n");
return;
}
 
spin_lock_irqsave(_to_drm(adev)->event_lock, flags);
 
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
-   DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
+   DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d 
!=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
 amdgpu_crtc->pflip_status,
 AMDGPU_FLIP_SUBMITTED,
 amdgpu_crtc->crtc_id,
@@ -450,9 +450,9 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(_to_drm(adev)->event_lock, flags);
 
-   DRM_DEBUG_KMS("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc,
- vrr_active, (int) !e);
+   DC_LOG_PFLIP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc,
+vrr_active, (int) !e);
 }
 
 static void dm_vupdate_high_irq(void *interrupt_params)
@@ -482,7 +482,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
atomic64_set(_params->previous_timestamp, 
vblank->time);
}
 
-   DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d\n",
  acrtc->crtc_id,
  vrr_active);
 
@@ -535,7 +535,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 
vrr_active = amdgpu_dm_vrr_active_irq(acrtc);
 
-   DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+   DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
  vrr_active, acrtc->dm_irq_params.active_planes);
 
/**
@@ -7991,7 +7991,7 @@ static void handle_cursor_update(struct drm_plane *plane,
if (!plane->state->fb && !old_plane_state->fb)
return;
 
-   DRM_DEBUG_KMS("%s: crtc_id=%d with size %d to %d\n",
+   DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n",
  __func__,
  amdgpu_crtc->crtc_id,
  plane->state->crtc_w,
@@ -8053,8 +8053,8 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
 
-   DRM_DEBUG_KMS("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
- acrtc->crtc_id);
+   DC_LOG_PFLIP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+acrtc->crtc_id);
 }
 
 static void update_freesync_state_on_stream(
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h 
b/drivers/gpu/drm/amd/display/include/logger_types.h
index 21bbee17c527..571fcf23cea9 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -36,6 +36,9 @@
 #define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
+#define DC_LOG_CURSOR(...) pr_debug("[CURSOR]:"__VA_ARGS__)
+#define DC_LOG_PFLIP(...) pr_debug("[PFLIP]:"__VA_ARGS__)
+#define DC_LOG_VBLANK(...) pr_debug("[VBLANK]:"__VA_ARGS__)
 #define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
 #define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
 #define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Christian König

Hi Alberto,

I think the driver should only support resolutions that are 
*progressive*, but also at least of *50Hz*.


Why do you think so?, the 24Hz resolution seems to be the native one of 
the display.


Regards,
Christian.

Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

This is why I'm using interlaced:

$ *xrandr*
Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
DisplayPort-0 disconnected (normal left inverted right x axis y axis)
HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x 
axis y axis) 16mm x 9mm

   1920x*1080i*    60.00*+  50.00    59.94
   1920x1080 *24.00*    23.98
   1280x*720*      60.00    50.00    59.94
   1024x768      75.03    70.07    60.00
   832x624       74.55
   800x600       72.19    75.00    60.32    56.25
   720x576       50.00
   720x576i      50.00
   720x480       60.00    59.94
   720x480i      60.00    59.94
   640x480       75.00    72.81    66.67    60.00    59.94
   720x400       70.08
DVI-0 disconnected (normal left inverted right x axis y axis)

I think the driver should only support resolutions that are 
*progressive*, but also at least of *50Hz*.


On Tue, 30 Mar 2021 at 15:41, Christian König 
> wrote:


Mhm, no idea why an interlaced resolution would cause a crash.
Maybe some miscalculation in the display code.

But apart from that if you just connected your PC to a TV I also
wouldn't recommend using an interlaced resolution in the first place.

See those resolutions only exists for backward compatibility with
analog hardware.

I think we would just disable those modes instead of searching for
the bug.

Regards,
Christian.

Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:

I guessed so.

The GPU is a Radeon HD5870, and the screen is an old Telefunken
TV (TLFK22LEDPVR1).

Since my real display got into repair I used this TV meanwhile,
and to my surprise it froze the system.

On Tue, 30 Mar 2021 at 10:15, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

well what hardware do you have?

Interlaced resolutions are not used any more on modern
hardware, so they
are not well tested.

Regards,
Christian.

Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
> The entire desktop hangs after some minutes when using the
module
> "radeon" with an interlaced resolution.
>
> Easier to trigger by playing a video on Firefox, at least
on kwin_x11.
> Wayland didn't exhibit the problem.
>
> Other display drivers, from different computers I have
tried, didn't
> allow those interlaced resolutions all together. It seems
they know
> there will be problems.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org  
https://lists.freedesktop.org/mailman/listinfo/amd-gfx  





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/2] ensure alignment on CPU page for bo mapping

2021-03-30 Thread Christian König
Reviewed-by: Christian König  for the entire 
series.


Alex will probably pick them up for the next feature pull request.

Regards,
Christian.

Am 30.03.21 um 17:33 schrieb Xℹ Ruoyao:

In AMDGPU driver, the bo mapping should always align to CPU page or
the page table is corrupted.

The first patch is cherry-picked from Loongson community, which sets a
suitable dev_info.gart_page_size so Mesa will handle the alignment
correctly.

The second patch is added to ensure an ioctl with unaligned parameter to
be rejected -EINVAL, instead of causing page table corruption.

The patches should be applied for drm-next.

Huacai Chen (1):
   drm/amdgpu: Set a suitable dev_info.gart_page_size

Xℹ Ruoyao (1):
   drm/amdgpu: check alignment on CPU page for bo map

  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 
  2 files changed, 6 insertions(+), 6 deletions(-)


base-commit: a0c8b193bfe81cc8e9c7c162bb8d777ba12596f0


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/4] drm/amdgpu: Macros for vram physical addr calculation

2021-03-30 Thread Christian König

Well I send a review?

Christian.

Am 30.03.21 um 17:04 schrieb Zeng, Oak:

[AMD Official Use Only - Internal Distribution Only]

Ping, can someone help review this series?

Regards,
Oak



On 2021-03-25, 12:38 PM, "Zeng, Oak"  wrote:

 Add one macro to calculate BO's GPU physical address.
 And another one to calculate BO's CPU physical address.

 Signed-off-by: Oak Zeng 
 Suggested-by: Lijo Lazar 
 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 3 +++
  1 file changed, 3 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
 index 2ee8d1b..7cd9d34 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
 @@ -283,6 +283,9 @@ struct amdgpu_gmc {
  #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) 
(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
  #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) 
(adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
  #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
 +#define amdgpu_gmc_gpu_va2pa(adev, va) (va - (adev)->gmc.vram_start + 
(adev)->vm_manager.vram_base_offset)
 +#define amdgpu_gmc_gpu_pa(adev, bo) amdgpu_gmc_gpu_va2pa(adev, 
amdgpu_bo_gpu_offset(bo))
 +#define amdgpu_gmc_cpu_pa(adev, bo) (amdgpu_bo_gpu_offset(bo) - 
(adev)->gmc.vram_start + (adev)->gmc.aper_base)

  /**
   * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through 
the BAR
 --
 2.7.4


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Alex Deucher
On Tue, Mar 30, 2021 at 12:06 PM Alberto Salvia Novella
 wrote:
>
> This is why I'm using interlaced:
>
> $ xrandr
> Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
> DisplayPort-0 disconnected (normal left inverted right x axis y axis)
> HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x axis y 
> axis) 16mm x 9mm
>1920x1080i60.00*+  50.0059.94
>1920x1080 24.0023.98
>1280x720  60.0050.0059.94
>1024x768  75.0370.0760.00
>832x624   74.55
>800x600   72.1975.0060.3256.25
>720x576   50.00
>720x576i  50.00
>720x480   60.0059.94
>720x480i  60.0059.94
>640x480   75.0072.8166.6760.0059.94
>720x400   70.08
> DVI-0 disconnected (normal left inverted right x axis y axis)
>
> I think the driver should only support resolutions that are progressive, but 
> also at least of 50Hz.

The supported display modes are dictated by the monitor.  Do you still
have problems with progressive modes?  I'd hate to disable interlaced
modes if they are working fine for others.

Alex


>
> On Tue, 30 Mar 2021 at 15:41, Christian König 
>  wrote:
>>
>> Mhm, no idea why an interlaced resolution would cause a crash. Maybe some 
>> miscalculation in the display code.
>>
>> But apart from that if you just connected your PC to a TV I also wouldn't 
>> recommend using an interlaced resolution in the first place.
>>
>> See those resolutions only exists for backward compatibility with analog 
>> hardware.
>>
>> I think we would just disable those modes instead of searching for the bug.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:
>>
>> I guessed so.
>>
>> The GPU is a Radeon HD5870, and the screen is an old Telefunken TV 
>> (TLFK22LEDPVR1).
>>
>> Since my real display got into repair I used this TV meanwhile, and to my 
>> surprise it froze the system.
>>
>> On Tue, 30 Mar 2021 at 10:15, Christian König  
>> wrote:
>>>
>>> Hi Alberto,
>>>
>>> well what hardware do you have?
>>>
>>> Interlaced resolutions are not used any more on modern hardware, so they
>>> are not well tested.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
>>> > The entire desktop hangs after some minutes when using the module
>>> > "radeon" with an interlaced resolution.
>>> >
>>> > Easier to trigger by playing a video on Firefox, at least on kwin_x11.
>>> > Wayland didn't exhibit the problem.
>>> >
>>> > Other display drivers, from different computers I have tried, didn't
>>> > allow those interlaced resolutions all together. It seems they know
>>> > there will be problems.
>>>
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/2] ensure alignment on CPU page for bo mapping

2021-03-30 Thread Xℹ Ruoyao
In AMDGPU driver, the bo mapping should always align to CPU page or
the page table is corrupted.

The first patch is cherry-picked from Loongson community, which sets a
suitable dev_info.gart_page_size so Mesa will handle the alignment
correctly.

The second patch is added to ensure an ioctl with unaligned parameter to
be rejected -EINVAL, instead of causing page table corruption.

The patches should be applied for drm-next.

Huacai Chen (1):
  drm/amdgpu: Set a suitable dev_info.gart_page_size

Xℹ Ruoyao (1):
  drm/amdgpu: check alignment on CPU page for bo map

 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 
 2 files changed, 6 insertions(+), 6 deletions(-)


base-commit: a0c8b193bfe81cc8e9c7c162bb8d777ba12596f0
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Xi Ruoyao
On 2021-03-30 15:24 +0200, Christian König wrote:
> Am 30.03.21 um 15:23 schrieb Dan Horák:
> > On Tue, 30 Mar 2021 21:09:12 +0800
> > Xi Ruoyao  wrote:
> > 
> > > On 2021-03-30 21:02 +0800, Xi Ruoyao wrote:
> > > > On 2021-03-30 14:55 +0200, Christian König wrote:
> > > > > I rather see this as a kernel bug. Can you test if this code fragment
> > > > > fixes your issue:
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > > index 64beb3399604..e1260b517e1b 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > > > @@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void
> > > > > *data, struct drm_file *filp)
> > > > >   }
> > > > >   dev_info->virtual_address_alignment =
> > > > > max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
> > > > >   dev_info->pte_fragment_size = (1 <<
> > > > > adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
> > > > > -   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
> > > > > +   dev_info->gart_page_size =
> > > > > dev_info->virtual_address_alignment;
> > > > >   dev_info->cu_active_number = adev-
> > > > > >gfx.cu_info.number;
> > > > >   dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
> > > > >   dev_info->ce_ram_size = adev->gfx.ce_ram_size;
> > > > It works.  I've seen it at
> > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fxen0n%2Flinux%2Fcommit%2F84ada72983838bd7ce54bc32f5d34ac5b5aae191data=04%7C01%7Cchristian.koenig%40amd.com%7Cf37fddf20a8847edf67808d8f37ef23c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637527074118791321%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=DZnmee38NGpiWRMX5LmlxOhxAzBMhAusnAWNnCxXTJ0%3Dreserved=0
> > > > before (with a common sub-expression, though :).
> > > Some comment: on an old version of Fedora ported by Loongson, Xorg just
> > > hangs
> > > without this commit.  But on the system I built from source, I didn't see
> > > any
> > > issue before Linux 5.11.  So I misbelieved that it was something already
> > > fixed.
> > > 
> > > Dan: you can try it on your PPC 64 with non-4K page as well.
> > yup, looks good here as well, ppc64le (Power9) system with 64KB pages
> 
> Mhm, as far as I can say this patch never made it to us.

I think maybe its author considers it a "workaround" like me, as there is
already a "virtual_address_alignment" which seems correct.

> Can you please send it to the mailing list with me on CC?

I've sent it, together with my patch for using ~PAGE_MASK in parameter
validation.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: Set a suitable dev_info.gart_page_size

2021-03-30 Thread Xℹ Ruoyao
From: Huacai Chen 

In Mesa, dev_info.gart_page_size is used for alignment and it was
set to AMDGPU_GPU_PAGE_SIZE(4KB). However, the page table of AMDGPU
driver requires an alignment on CPU pages.  So, for non-4KB page system,
gart_page_size should be max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE).

Signed-off-by: Rui Wang 
Signed-off-by: Huacai Chen 
Link: https://github.com/loongson-community/linux-stable/commit/caa9c0a1
[Xi: rebased for drm-next, use max_t for checkpatch,
 and reworded commit message.]
Signed-off-by: Xi Ruoyao 
BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1549
Tested-by: Dan Horák 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 86eeeb4f3513..3b0be64e4638 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -791,9 +791,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
dev_info->high_va_offset = AMDGPU_GMC_HOLE_END;
dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
}
-   dev_info->virtual_address_alignment = max((int)PAGE_SIZE, 
AMDGPU_GPU_PAGE_SIZE);
+   dev_info->virtual_address_alignment = max_t(u32, PAGE_SIZE, 
AMDGPU_GPU_PAGE_SIZE);
dev_info->pte_fragment_size = (1 << 
adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
-   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+   dev_info->gart_page_size = max_t(u32, PAGE_SIZE, 
AMDGPU_GPU_PAGE_SIZE);
dev_info->cu_active_number = adev->gfx.cu_info.number;
dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
dev_info->ce_ram_size = adev->gfx.ce_ram_size;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Alberto Salvia Novella
This is why I'm using interlaced:

$ *xrandr*
Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
DisplayPort-0 disconnected (normal left inverted right x axis y axis)
HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x axis y
axis) 16mm x 9mm
   1920x*1080i*60.00*+  50.0059.94
   1920x1080 *24.00*23.98
   1280x*720*  60.0050.0059.94
   1024x768  75.0370.0760.00
   832x624   74.55
   800x600   72.1975.0060.3256.25
   720x576   50.00
   720x576i  50.00
   720x480   60.0059.94
   720x480i  60.0059.94
   640x480   75.0072.8166.6760.0059.94
   720x400   70.08
DVI-0 disconnected (normal left inverted right x axis y axis)

I think the driver should only support resolutions that are *progressive*,
but also at least of *50Hz*.

On Tue, 30 Mar 2021 at 15:41, Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Mhm, no idea why an interlaced resolution would cause a crash. Maybe some
> miscalculation in the display code.
>
> But apart from that if you just connected your PC to a TV I also wouldn't
> recommend using an interlaced resolution in the first place.
>
> See those resolutions only exists for backward compatibility with analog
> hardware.
>
> I think we would just disable those modes instead of searching for the bug.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:
>
> I guessed so.
>
> The GPU is a Radeon HD5870, and the screen is an old Telefunken TV
> (TLFK22LEDPVR1).
>
> Since my real display got into repair I used this TV meanwhile, and to my
> surprise it froze the system.
>
> On Tue, 30 Mar 2021 at 10:15, Christian König 
> wrote:
>
>> Hi Alberto,
>>
>> well what hardware do you have?
>>
>> Interlaced resolutions are not used any more on modern hardware, so they
>> are not well tested.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
>> > The entire desktop hangs after some minutes when using the module
>> > "radeon" with an interlaced resolution.
>> >
>> > Easier to trigger by playing a video on Firefox, at least on kwin_x11.
>> > Wayland didn't exhibit the problem.
>> >
>> > Other display drivers, from different computers I have tried, didn't
>> > allow those interlaced resolutions all together. It seems they know
>> > there will be problems.
>>
>>
> ___
> amd-gfx mailing 
> listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: check alignment on CPU page for bo map

2021-03-30 Thread Xℹ Ruoyao
The page table of AMDGPU requires an alignment to CPU page so we should
check ioctl parameters for it.  Return -EINVAL if some parameter is
unaligned to CPU page, instead of corrupt the page table sliently.

Signed-off-by: Xi Ruoyao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc4d6ae71476..a01c158bc29f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2198,8 +2198,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
uint64_t eaddr;
 
/* validate the parameters */
-   if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
-   size == 0 || size & AMDGPU_GPU_PAGE_MASK)
+   if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+   size == 0 || size & ~PAGE_MASK)
return -EINVAL;
 
/* make sure object fit at this offset */
@@ -2264,8 +2264,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
int r;
 
/* validate the parameters */
-   if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
-   size == 0 || size & AMDGPU_GPU_PAGE_MASK)
+   if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+   size == 0 || size & ~PAGE_MASK)
return -EINVAL;
 
/* make sure object fit at this offset */
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RESEND 00/19] Rid GPU from W=1 warnings

2021-03-30 Thread Lee Jones
On Wed, 24 Mar 2021, Lee Jones wrote:

> Daniel,
> 
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > This is a resend of the remaining patches.
> > 
> > All of these patches have been sent before.
> 
> Are you still keen to 'hoover these up'?
> 
> Just leave the one that requires work and take the rest perhaps?

How would you like me to proceed with this?

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/4] drm/amdgpu: Macros for vram physical addr calculation

2021-03-30 Thread Zeng, Oak
[AMD Official Use Only - Internal Distribution Only]

Ping, can someone help review this series?

Regards,
Oak



On 2021-03-25, 12:38 PM, "Zeng, Oak"  wrote:

Add one macro to calculate BO's GPU physical address.
And another one to calculate BO's CPU physical address.

Signed-off-by: Oak Zeng 
Suggested-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 2ee8d1b..7cd9d34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -283,6 +283,9 @@ struct amdgpu_gmc {
 #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) 
(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
 #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) 
(adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
 #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_gpu_va2pa(adev, va) (va - (adev)->gmc.vram_start + 
(adev)->vm_manager.vram_base_offset)
+#define amdgpu_gmc_gpu_pa(adev, bo) amdgpu_gmc_gpu_va2pa(adev, 
amdgpu_bo_gpu_offset(bo))
+#define amdgpu_gmc_cpu_pa(adev, bo) (amdgpu_bo_gpu_offset(bo) - 
(adev)->gmc.vram_start + (adev)->gmc.aper_base)

 /**
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through 
the BAR
--
2.7.4


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon: avoid potential null pointer access

2021-03-30 Thread Christian König

Am 30.03.21 um 16:04 schrieb Guchun Chen:

Leverage the same logic from amdgpu_ttm_tt_unpin_userptr.

Signed-off-by: Guchun Chen 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/radeon_ttm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 37ac42d6740f..2a61cff325e4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -415,7 +415,7 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_device 
*bdev, struct ttm_tt *
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
  
  	/* double check that we don't free the table twice */

-   if (!ttm->sg->sgl)
+   if (!ttm->sg || !ttm->sg->sgl)
return;
  
  	/* free the sg table and pages again */


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon: avoid potential null pointer access

2021-03-30 Thread Guchun Chen
Leverage the same logic from amdgpu_ttm_tt_unpin_userptr.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 37ac42d6740f..2a61cff325e4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -415,7 +415,7 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_device 
*bdev, struct ttm_tt *
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
/* double check that we don't free the table twice */
-   if (!ttm->sg->sgl)
+   if (!ttm->sg || !ttm->sg->sgl)
return;
 
/* free the sg table and pages again */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Christian König
Mhm, no idea why an interlaced resolution would cause a crash. Maybe 
some miscalculation in the display code.


But apart from that if you just connected your PC to a TV I also 
wouldn't recommend using an interlaced resolution in the first place.


See those resolutions only exists for backward compatibility with analog 
hardware.


I think we would just disable those modes instead of searching for the bug.

Regards,
Christian.

Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:

I guessed so.

The GPU is a Radeon HD5870, and the screen is an old Telefunken TV 
(TLFK22LEDPVR1).


Since my real display got into repair I used this TV meanwhile, and to 
my surprise it froze the system.


On Tue, 30 Mar 2021 at 10:15, Christian König 
mailto:christian.koe...@amd.com>> wrote:


Hi Alberto,

well what hardware do you have?

Interlaced resolutions are not used any more on modern hardware,
so they
are not well tested.

Regards,
Christian.

Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
> The entire desktop hangs after some minutes when using the module
> "radeon" with an interlaced resolution.
>
> Easier to trigger by playing a video on Firefox, at least on
kwin_x11.
> Wayland didn't exhibit the problem.
>
> Other display drivers, from different computers I have tried,
didn't
> allow those interlaced resolutions all together. It seems they know
> there will be problems.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix compiler warning(v2)

2021-03-30 Thread Christian König

Am 30.03.21 um 15:24 schrieb Guchun Chen:

warning: ISO C90 forbids mixed declarations and code 
[-Wdeclaration-after-statement]
   int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

v2: put short variable declaration last

Signed-off-by: Guchun Chen 


I still don't know why you get a compiler warning, but it certainly 
improves the coding style.


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 722efd86718e..fbaa4c148cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -823,11 +823,10 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
*bdev,
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   int r;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+   int r;
  
  	/* Allocate an SG array and squash pages into it */

r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
@@ -861,7 +860,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix compiler warning(v2)

2021-03-30 Thread Guchun Chen
warning: ISO C90 forbids mixed declarations and code 
[-Wdeclaration-after-statement]
  int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

v2: put short variable declaration last

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 722efd86718e..fbaa4c148cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -823,11 +823,10 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
*bdev,
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-   int r;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+   int r;
 
/* Allocate an SG array and squash pages into it */
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
@@ -861,7 +860,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Dan Horák
On Tue, 30 Mar 2021 14:55:01 +0200
Christian König  wrote:

> Am 30.03.21 um 14:04 schrieb Xi Ruoyao:
> > On 2021-03-30 03:40 +0800, Xi Ruoyao wrote:
> >> On 2021-03-29 21:36 +0200, Christian König wrote:
> >>> Am 29.03.21 um 21:27 schrieb Xi Ruoyao:
>  Hi Christian,
> 
>  I don't think there is any constraint implemented to ensure `num_entries 
>  %
>  AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0`.  For example, in 
>  `amdgpu_vm_bo_map()`:
> 
>    /* validate the parameters */
>    if (saddr & AMDGPU_GPU_PAGE_MASK || offset & 
>  AMDGPU_GPU_PAGE_MASK
>    size == 0 || size & AMDGPU_GPU_PAGE_MASK)
>    return -EINVAL;
> 
>  /* snip */
> 
>    saddr /= AMDGPU_GPU_PAGE_SIZE;
>    eaddr /= AMDGPU_GPU_PAGE_SIZE;
> 
>  /* snip */
> 
>    mapping->start = saddr;
>    mapping->last = eaddr;
> 
> 
>  If we really want to ensure (mapping->last - mapping->start + 1) %
>  AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0, then we should replace
>  "AMDGPU_GPU_PAGE_MASK"
>  in "validate the parameters" with "PAGE_MASK".
> > It should be "~PAGE_MASK", "PAGE_MASK" has an opposite convention of
> > "AMDGPU_GPU_PAGE_MASK" :(.
> >
> >>> Yeah, good point.
> >>>
>  I tried it and it broke userspace: Xorg startup fails with EINVAL with
>  this
>  change.
> >>> Well in theory it is possible that we always fill the GPUVM on a 4k
> >>> basis while the native page size of the CPU is larger. Let me double
> >>> check the code.
> > On my platform, there are two issues both causing the VM corruption.  One is
> > fixed by agd5f/linux@fe001e7.
> 
> What is fe001e7? A commit id? If yes then that is to short and I can't 
> find it.

it's a gitlab shortcut for
https://gitlab.freedesktop.org/agd5f/linux/-/commit/fe001e70a55d0378328612be1fdc3abfc68b9ccc


Dan
> 
> >Another is in Mesa from userspace:  it uses
> > `dev_info->gart_page_size` as the alignment, but the kernel AMDGPU driver
> > expects it to use `dev_info->virtual_address_alignment`.
> 
> Mhm, looking at the kernel code I would rather say Mesa is correct and 
> the kernel driver is broken.
> 
> The gart_page_size is limited by the CPU page size, but the 
> virtual_address_alignment isn't.
> 
> > If we can make the change to fill the GPUVM on a 4k basis, we can fix this 
> > issue
> > and make virtual_address_alignment = 4K.  Otherwise, we should fortify the
> > parameter validation, changing "AMDGPU_GPU_PAGE_MASK" to "~PAGE_MASK".  
> > Then the
> > userspace will just get an EINVAL, instead of a slient VM corruption.  And
> > someone should tell Mesa developers to fix the code in this case.
> 
> I rather see this as a kernel bug. Can you test if this code fragment 
> fixes your issue:
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 64beb3399604..e1260b517e1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
>      }
>      dev_info->virtual_address_alignment = 
> max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
>      dev_info->pte_fragment_size = (1 << 
> adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
> -   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
> +   dev_info->gart_page_size = 
> dev_info->virtual_address_alignment;
>      dev_info->cu_active_number = adev->gfx.cu_info.number;
>      dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
>      dev_info->ce_ram_size = adev->gfx.ce_ram_size;
> 
> 
> Thanks,
> Christian.
> 
> > --
> > Xi Ruoyao 
> > School of Aerospace Science and Technology, Xidian University
> >
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Xi Ruoyao
On 2021-03-30 14:55 +0200, Christian König wrote:
> Am 30.03.21 um 14:04 schrieb Xi Ruoyao:
> > On 2021-03-30 03:40 +0800, Xi Ruoyao wrote:
> > > On 2021-03-29 21:36 +0200, Christian König wrote:
> > > > Am 29.03.21 um 21:27 schrieb Xi Ruoyao:
> > > > > Hi Christian,
> > > > > 
> > > > > I don't think there is any constraint implemented to ensure 
> > > > > `num_entries
> > > > > %
> > > > > AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0`.  For example, in
> > > > > `amdgpu_vm_bo_map()`:
> > > > > 
> > > > >   /* validate the parameters */
> > > > >   if (saddr & AMDGPU_GPU_PAGE_MASK || offset &
> > > > > AMDGPU_GPU_PAGE_MASK
> > > > >   size == 0 || size & AMDGPU_GPU_PAGE_MASK)
> > > > >   return -EINVAL;
> > > > > 
> > > > > /* snip */
> > > > > 
> > > > >   saddr /= AMDGPU_GPU_PAGE_SIZE;
> > > > >   eaddr /= AMDGPU_GPU_PAGE_SIZE;
> > > > > 
> > > > > /* snip */
> > > > > 
> > > > >   mapping->start = saddr;
> > > > >   mapping->last = eaddr;
> > > > > 
> > > > > 
> > > > > If we really want to ensure (mapping->last - mapping->start + 1) %
> > > > > AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0, then we should replace
> > > > > "AMDGPU_GPU_PAGE_MASK"
> > > > > in "validate the parameters" with "PAGE_MASK".
> > It should be "~PAGE_MASK", "PAGE_MASK" has an opposite convention of
> > "AMDGPU_GPU_PAGE_MASK" :(.
> > 
> > > > Yeah, good point.
> > > > 
> > > > > I tried it and it broke userspace: Xorg startup fails with EINVAL with
> > > > > this
> > > > > change.
> > > > Well in theory it is possible that we always fill the GPUVM on a 4k
> > > > basis while the native page size of the CPU is larger. Let me double
> > > > check the code.
> > On my platform, there are two issues both causing the VM corruption.  One is
> > fixed by agd5f/linux@fe001e7.
> 
> What is fe001e7? A commit id? If yes then that is to short and I can't 
> find it.
> 
> >    Another is in Mesa from userspace:  it uses
> > `dev_info->gart_page_size` as the alignment, but the kernel AMDGPU driver
> > expects it to use `dev_info->virtual_address_alignment`.
> 
> Mhm, looking at the kernel code I would rather say Mesa is correct and 
> the kernel driver is broken.
> 
> The gart_page_size is limited by the CPU page size, but the 
> virtual_address_alignment isn't.
> 
> > If we can make the change to fill the GPUVM on a 4k basis, we can fix this
> > issue
> > and make virtual_address_alignment = 4K.  Otherwise, we should fortify the
> > parameter validation, changing "AMDGPU_GPU_PAGE_MASK" to "~PAGE_MASK".  Then
> > the
> > userspace will just get an EINVAL, instead of a slient VM corruption.  And
> > someone should tell Mesa developers to fix the code in this case.
> 
> I rather see this as a kernel bug. Can you test if this code fragment 
> fixes your issue:
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 64beb3399604..e1260b517e1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
>  }
>  dev_info->virtual_address_alignment = 
> max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
>  dev_info->pte_fragment_size = (1 << 
> adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
> -   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
> +   dev_info->gart_page_size = 
> dev_info->virtual_address_alignment;
>  dev_info->cu_active_number = adev->gfx.cu_info.number;
>  dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
>  dev_info->ce_ram_size = adev->gfx.ce_ram_size;

It works.  I've seen it at
https://github.com/xen0n/linux/commit/84ada72983838bd7ce54bc32f5d34ac5b5aae191
before (with a common sub-expression, though :).

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 07/20] drm/print: Fixup DRM_DEBUG_KMS_RATELIMITED()

2021-03-30 Thread Robert Foss
Hey Lyude,

I'm seeing no issues with this patch and the reasoning behind the
patch is sound to me.

Reviewed-by: Robert Foss 

On Fri, 26 Mar 2021 at 21:39, Lyude Paul  wrote:
>
> Since we're about to move drm_dp_helper.c over to drm_dbg_*(), we'll want
> to make sure that we can also add ratelimited versions of these macros in
> order to retain some of the previous debugging output behavior we had.
>
> However, as I was preparing to do this I noticed that the current
> rate limited macros we have are kind of bogus. It looks like when I wrote
> these, I didn't notice that we'd always be calling __ratelimit() even if
> the debugging message we'd be printing would normally be filtered out due
> to the relevant DRM debugging category being disabled.
>
> So, let's fix this by making sure to check drm_debug_enabled() in our
> ratelimited macros before calling __ratelimit(), and start using
> drm_dev_printk() in order to print debugging messages since that will save
> us from doing a redundant drm_debug_enabled() check. And while we're at it,
> let's move the code for this into another macro that we can reuse for
> defining new ratelimited DRM debug macros more easily.
>
> v2:
> * Make sure to use tabs where possible in __DRM_DEFINE_DBG_RATELIMITED()
>
> Signed-off-by: Lyude Paul 
> Cc: Robert Foss 
> ---
>  include/drm/drm_print.h | 20 
>  1 file changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
> index f32d179e139d..a3c58c941bdc 100644
> --- a/include/drm/drm_print.h
> +++ b/include/drm/drm_print.h
> @@ -524,16 +524,20 @@ void __drm_err(const char *format, ...);
>  #define DRM_DEBUG_DP(fmt, ...) \
> __drm_dbg(DRM_UT_DP, fmt, ## __VA_ARGS__)
>
> -
> -#define DRM_DEBUG_KMS_RATELIMITED(fmt, ...)\
> -({ \
> -   static DEFINE_RATELIMIT_STATE(_rs,  \
> - DEFAULT_RATELIMIT_INTERVAL,   \
> - DEFAULT_RATELIMIT_BURST); \
> -   if (__ratelimit(&_rs))  \
> -   drm_dev_dbg(NULL, DRM_UT_KMS, fmt, ##__VA_ARGS__);  \
> +#define __DRM_DEFINE_DBG_RATELIMITED(category, drm, fmt, ...)
>   \
> +({   
>   \
> +   static DEFINE_RATELIMIT_STATE(rs_, DEFAULT_RATELIMIT_INTERVAL, 
> DEFAULT_RATELIMIT_BURST);\
> +   const struct drm_device *drm_ = (drm);
>   \
> + 
>   \
> +   if (drm_debug_enabled(DRM_UT_ ## category) && __ratelimit(_))  
>   \
> +   drm_dev_printk(drm_ ? drm_->dev : NULL, KERN_DEBUG, fmt, ## 
> __VA_ARGS__);   \
>  })
>
> +#define drm_dbg_kms_ratelimited(drm, fmt, ...) \
> +   __DRM_DEFINE_DBG_RATELIMITED(KMS, drm, fmt, ## __VA_ARGS__)
> +
> +#define DRM_DEBUG_KMS_RATELIMITED(fmt, ...) drm_dbg_kms_ratelimited(NULL, 
> fmt, ## __VA_ARGS__)
> +
>  /*
>   * struct drm_device based WARNs
>   *
> --
> 2.30.2
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Xi Ruoyao
On 2021-03-30 03:40 +0800, Xi Ruoyao wrote:
> On 2021-03-29 21:36 +0200, Christian König wrote:
> > Am 29.03.21 um 21:27 schrieb Xi Ruoyao:
> > > Hi Christian,
> > > 
> > > I don't think there is any constraint implemented to ensure `num_entries %
> > > AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0`.  For example, in `amdgpu_vm_bo_map()`:
> > > 
> > >  /* validate the parameters */
> > >  if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK
> > > > > 
> > >  size == 0 || size & AMDGPU_GPU_PAGE_MASK)
> > >  return -EINVAL;
> > > 
> > > /* snip */
> > > 
> > >  saddr /= AMDGPU_GPU_PAGE_SIZE;
> > >  eaddr /= AMDGPU_GPU_PAGE_SIZE;
> > > 
> > > /* snip */
> > > 
> > >  mapping->start = saddr;
> > >  mapping->last = eaddr;
> > > 
> > > 
> > > If we really want to ensure (mapping->last - mapping->start + 1) %
> > > AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0, then we should replace
> > > "AMDGPU_GPU_PAGE_MASK"
> > > in "validate the parameters" with "PAGE_MASK".

It should be "~PAGE_MASK", "PAGE_MASK" has an opposite convention of
"AMDGPU_GPU_PAGE_MASK" :(.

> > Yeah, good point.
> > 
> > > I tried it and it broke userspace: Xorg startup fails with EINVAL with
> > > this
> > > change.
> > 
> > Well in theory it is possible that we always fill the GPUVM on a 4k 
> > basis while the native page size of the CPU is larger. Let me double 
> > check the code.

On my platform, there are two issues both causing the VM corruption.  One is
fixed by agd5f/linux@fe001e7.  Another is in Mesa from userspace:  it uses
`dev_info->gart_page_size` as the alignment, but the kernel AMDGPU driver
expects it to use `dev_info->virtual_address_alignment`.

If we can make the change to fill the GPUVM on a 4k basis, we can fix this issue
and make virtual_address_alignment = 4K.  Otherwise, we should fortify the
parameter validation, changing "AMDGPU_GPU_PAGE_MASK" to "~PAGE_MASK".  Then the
userspace will just get an EINVAL, instead of a slient VM corruption.  And
someone should tell Mesa developers to fix the code in this case.
--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 19/20] drm/dp_mst: Drop DRM_ERROR() on kzalloc() fail in drm_dp_mst_handle_up_req()

2021-03-30 Thread Robert Foss
Hey Lyude,

This patch looks good to me.

Reviewed-by: Robert Foss 

On Fri, 26 Mar 2021 at 21:40, Lyude Paul  wrote:
>
> Checkpatch was complaining about this - there's no need for us to print
> errors when kzalloc() fails, as kzalloc() will already WARN for us. So,
> let's fix that before converting things to make checkpatch happy.
>
> Signed-off-by: Lyude Paul 
> Cc: Robert Foss 
> ---
>  drivers/gpu/drm/drm_dp_mst_topology.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
> b/drivers/gpu/drm/drm_dp_mst_topology.c
> index ec2285595c33..74c420f5f204 100644
> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> @@ -4113,10 +4113,9 @@ static int drm_dp_mst_handle_up_req(struct 
> drm_dp_mst_topology_mgr *mgr)
> return 0;
>
> up_req = kzalloc(sizeof(*up_req), GFP_KERNEL);
> -   if (!up_req) {
> -   DRM_ERROR("Not enough memory to process MST up req\n");
> +   if (!up_req)
> return -ENOMEM;
> -   }
> +
> INIT_LIST_HEAD(_req->next);
>
> drm_dp_sideband_parse_req(>up_req_recv, _req->msg);
> --
> 2.30.2
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Christian König



Am 30.03.21 um 15:23 schrieb Dan Horák:

On Tue, 30 Mar 2021 21:09:12 +0800
Xi Ruoyao  wrote:


On 2021-03-30 21:02 +0800, Xi Ruoyao wrote:

On 2021-03-30 14:55 +0200, Christian König wrote:

I rather see this as a kernel bug. Can you test if this code fragment
fixes your issue:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 64beb3399604..e1260b517e1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void
*data, struct drm_file *filp)
  }
  dev_info->virtual_address_alignment =
max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
  dev_info->pte_fragment_size = (1 <<
adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
-   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+   dev_info->gart_page_size =
dev_info->virtual_address_alignment;
  dev_info->cu_active_number = adev->gfx.cu_info.number;
  dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
  dev_info->ce_ram_size = adev->gfx.ce_ram_size;

It works.  I've seen it at
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fxen0n%2Flinux%2Fcommit%2F84ada72983838bd7ce54bc32f5d34ac5b5aae191data=04%7C01%7Cchristian.koenig%40amd.com%7Cf37fddf20a8847edf67808d8f37ef23c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637527074118791321%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=DZnmee38NGpiWRMX5LmlxOhxAzBMhAusnAWNnCxXTJ0%3Dreserved=0
before (with a common sub-expression, though :).

Some comment: on an old version of Fedora ported by Loongson, Xorg just hangs
without this commit.  But on the system I built from source, I didn't see any
issue before Linux 5.11.  So I misbelieved that it was something already fixed.

Dan: you can try it on your PPC 64 with non-4K page as well.

yup, looks good here as well, ppc64le (Power9) system with 64KB pages


Mhm, as far as I can say this patch never made it to us.

Can you please send it to the mailing list with me on CC?

Thanks,
Christian.




Dan


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Xi Ruoyao
On 2021-03-30 21:02 +0800, Xi Ruoyao wrote:
> On 2021-03-30 14:55 +0200, Christian König wrote:
> > 
> > I rather see this as a kernel bug. Can you test if this code fragment 
> > fixes your issue:
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 64beb3399604..e1260b517e1b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> > *data, struct drm_file *filp)
> >  }
> >  dev_info->virtual_address_alignment = 
> > max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
> >  dev_info->pte_fragment_size = (1 << 
> > adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
> > -   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
> > +   dev_info->gart_page_size = 
> > dev_info->virtual_address_alignment;
> >  dev_info->cu_active_number = adev->gfx.cu_info.number;
> >  dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
> >  dev_info->ce_ram_size = adev->gfx.ce_ram_size;
> 
> It works.  I've seen it at
> https://github.com/xen0n/linux/commit/84ada72983838bd7ce54bc32f5d34ac5b5aae191
> before (with a common sub-expression, though :).

Some comment: on an old version of Fedora ported by Loongson, Xorg just hangs
without this commit.  But on the system I built from source, I didn't see any
issue before Linux 5.11.  So I misbelieved that it was something already fixed.

Dan: you can try it on your PPC 64 with non-4K page as well.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 20/20] drm/dp_mst: Convert drm_dp_mst_topology.c to drm_err()/drm_dbg*()

2021-03-30 Thread Robert Foss
Hey Lyude,

This patch looks good, but I have one question below. With it
addressed, feel free to add my r-b.

Reviewed-by: Robert Foss 

>
> -static bool drm_dp_sideband_parse_req(struct drm_dp_sideband_msg_rx *raw,
> +static bool drm_dp_sideband_parse_req(const struct drm_dp_mst_topology_mgr 
> *mgr,
> + struct drm_dp_sideband_msg_rx *raw,
>   struct drm_dp_sideband_msg_req_body 
> *msg)
>  {
> memset(msg, 0, sizeof(*msg));
> @@ -1117,12 +1125,12 @@ static bool drm_dp_sideband_parse_req(struct 
> drm_dp_sideband_msg_rx *raw,
>
> switch (msg->req_type) {
> case DP_CONNECTION_STATUS_NOTIFY:
> -   return drm_dp_sideband_parse_connection_status_notify(raw, 
> msg);
> +   return drm_dp_sideband_parse_connection_status_notify(mgr, 
> raw, msg);
> case DP_RESOURCE_STATUS_NOTIFY:
> -   return drm_dp_sideband_parse_resource_status_notify(raw, msg);
> +   return drm_dp_sideband_parse_resource_status_notify(mgr, raw, 
> msg);
> default:
> -   DRM_ERROR("Got unknown request 0x%02x (%s)\n", msg->req_type,
> - drm_dp_mst_req_type_str(msg->req_type));
> +   drm_err(mgr->dev, "Got unknown request 0x%02x (%s)\n",
> +   msg->req_type, 
> drm_dp_mst_req_type_str(msg->req_type));
> return false;
> }
>  }
>

.. snip ..

> @@ -4118,12 +4121,12 @@ static int drm_dp_mst_handle_up_req(struct 
> drm_dp_mst_topology_mgr *mgr)
>
> INIT_LIST_HEAD(_req->next);
>
> -   drm_dp_sideband_parse_req(>up_req_recv, _req->msg);
> +   drm_dp_sideband_parse_req(mgr, >up_req_recv, _req->msg);

drm_dp_sideband_parse_req() is only called here, and the function
arguments could probably stand to have `>up_req_recv` removed
(here and in the func. declaration) since the same data structure is
accessible through the `mgr` pointer inside of
drm_dp_sideband_parse_req(). I guess this is a matter of taste, so
feel free to do what you want with this.

>
> if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY &&
> up_req->msg.req_type != DP_RESOURCE_STATUS_NOTIFY) {
> -   DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n",
> - up_req->msg.req_type);
> +   drm_dbg_kms(mgr->dev, "Received unknown up req type, 
> ignoring: %x\n",
> +   up_req->msg.req_type);
> kfree(up_req);
> goto out;
> }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Interlaced resolutions hang the desktop

2021-03-30 Thread Alberto Salvia Novella
The entire desktop hangs after some minutes when using the module "radeon"
with an interlaced resolution.

Easier to trigger by playing a video on Firefox, at least on kwin_x11.
Wayland didn't exhibit the problem.

Other display drivers, from different computers I have tried, didn't allow
those interlaced resolutions all together. It seems they know there will be
problems.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 17/20] drm/dp: Convert drm_dp_helper.c to using drm_err/drm_dbg_*()

2021-03-30 Thread Robert Foss
Hey Lyude,

Looks good to me.

Reviewed-by: Robert Foss 


On Fri, 26 Mar 2021 at 21:40, Lyude Paul  wrote:
>
> Now that we've added a back-pointer to drm_device to drm_dp_aux, made
> drm_dp_aux available to any functions in drm_dp_helper.c which need to
> print to the kernel log, and ensured all of our logging uses a consistent
> format, let's do the final step of the conversion and actually move
> everything over to using drm_err() and drm_dbg_*().
>
> This was done by using the following cocci script:
>
>   @@
>   expression list expr;
>   @@
>
>   (
>   - DRM_DEBUG_KMS(expr);
>   + drm_dbg_kms(aux->drm_dev, expr);
>   |
>   - DRM_DEBUG_DP(expr);
>   + drm_dbg_dp(aux->drm_dev, expr);
>   |
>   - DRM_DEBUG_ATOMIC(expr);
>   + drm_dbg_atomic(aux->drm_dev, expr);
>   |
>   - DRM_DEBUG_KMS_RATELIMITED(expr);
>   + drm_dbg_kms_ratelimited(aux->drm_dev, expr);
>   |
>   - DRM_ERROR(expr);
>   + drm_err(aux->drm_dev, expr);
>   )
>
> Followed by correcting the resulting line-wrapping in the results by hand.
>
> v2:
> * Fix indenting in drm_dp_dump_access
>
> Signed-off-by: Lyude Paul 
> Cc: Robert Foss 
> ---
>  drivers/gpu/drm/drm_dp_helper.c | 121 
>  1 file changed, 59 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
> index 54e19d7b9c51..4940db0bcaae 100644
> --- a/drivers/gpu/drm/drm_dp_helper.c
> +++ b/drivers/gpu/drm/drm_dp_helper.c
> @@ -139,8 +139,8 @@ void drm_dp_link_train_clock_recovery_delay(const struct 
> drm_dp_aux *aux,
>  DP_TRAINING_AUX_RD_MASK;
>
> if (rd_interval > 4)
> -   DRM_DEBUG_KMS("%s: AUX interval %lu, out of range (max 4)\n",
> - aux->name, rd_interval);
> +   drm_dbg_kms(aux->drm_dev, "%s: AUX interval %lu, out of range 
> (max 4)\n",
> +   aux->name, rd_interval);
>
> if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14)
> rd_interval = 100;
> @@ -155,8 +155,8 @@ static void __drm_dp_link_train_channel_eq_delay(const 
> struct drm_dp_aux *aux,
>  unsigned long rd_interval)
>  {
> if (rd_interval > 4)
> -   DRM_DEBUG_KMS("%s: AUX interval %lu, out of range (max 4)\n",
> - aux->name, rd_interval);
> +   drm_dbg_kms(aux->drm_dev, "%s: AUX interval %lu, out of range 
> (max 4)\n",
> +   aux->name, rd_interval);
>
> if (rd_interval == 0)
> rd_interval = 400;
> @@ -220,11 +220,11 @@ drm_dp_dump_access(const struct drm_dp_aux *aux,
> const char *arrow = request == DP_AUX_NATIVE_READ ? "->" : "<-";
>
> if (ret > 0)
> -   DRM_DEBUG_DP("%s: 0x%05x AUX %s (ret=%3d) %*ph\n",
> -aux->name, offset, arrow, ret, min(ret, 20), 
> buffer);
> +   drm_dbg_dp(aux->drm_dev, "%s: 0x%05x AUX %s (ret=%3d) %*ph\n",
> +  aux->name, offset, arrow, ret, min(ret, 20), 
> buffer);
> else
> -   DRM_DEBUG_DP("%s: 0x%05x AUX %s (ret=%3d)\n",
> -aux->name, offset, arrow, ret);
> +   drm_dbg_dp(aux->drm_dev, "%s: 0x%05x AUX %s (ret=%3d)\n",
> +  aux->name, offset, arrow, ret);
>  }
>
>  /**
> @@ -287,8 +287,8 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 
> request,
> err = ret;
> }
>
> -   DRM_DEBUG_KMS("%s: Too many retries, giving up. First error: %d\n",
> - aux->name, err);
> +   drm_dbg_kms(aux->drm_dev, "%s: Too many retries, giving up. First 
> error: %d\n",
> +   aux->name, err);
> ret = err;
>
>  unlock:
> @@ -524,44 +524,44 @@ bool drm_dp_send_real_edid_checksum(struct drm_dp_aux 
> *aux,
>
> if (drm_dp_dpcd_read(aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
>  _test_req, 1) < 1) {
> -   DRM_ERROR("%s: DPCD failed read at register 0x%x\n",
> - aux->name, DP_DEVICE_SERVICE_IRQ_VECTOR);
> +   drm_err(aux->drm_dev, "%s: DPCD failed read at register 
> 0x%x\n",
> +   aux->name, DP_DEVICE_SERVICE_IRQ_VECTOR);
> return false;
> }
> auto_test_req &= DP_AUTOMATED_TEST_REQUEST;
>
> if (drm_dp_dpcd_read(aux, DP_TEST_REQUEST, _edid_read, 1) < 1) {
> -   DRM_ERROR("%s: DPCD failed read at register 0x%x\n",
> - aux->name, DP_TEST_REQUEST);
> +   drm_err(aux->drm_dev, "%s: DPCD failed read at register 
> 0x%x\n",
> +   aux->name, DP_TEST_REQUEST);
> return false;
> }
> link_edid_read &= DP_TEST_LINK_EDID_READ;
>
> if (!auto_test_req || !link_edid_read) {
> -   DRM_DEBUG_KMS("%s: Source DUT does not 

Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Dan Horák
On Tue, 30 Mar 2021 21:09:12 +0800
Xi Ruoyao  wrote:

> On 2021-03-30 21:02 +0800, Xi Ruoyao wrote:
> > On 2021-03-30 14:55 +0200, Christian König wrote:
> > > 
> > > I rather see this as a kernel bug. Can you test if this code fragment 
> > > fixes your issue:
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index 64beb3399604..e1260b517e1b 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> > > *data, struct drm_file *filp)
> > >  }
> > >  dev_info->virtual_address_alignment = 
> > > max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
> > >  dev_info->pte_fragment_size = (1 << 
> > > adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
> > > -   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
> > > +   dev_info->gart_page_size = 
> > > dev_info->virtual_address_alignment;
> > >  dev_info->cu_active_number = adev->gfx.cu_info.number;
> > >  dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
> > >  dev_info->ce_ram_size = adev->gfx.ce_ram_size;
> > 
> > It works.  I've seen it at
> > https://github.com/xen0n/linux/commit/84ada72983838bd7ce54bc32f5d34ac5b5aae191
> > before (with a common sub-expression, though :).
> 
> Some comment: on an old version of Fedora ported by Loongson, Xorg just hangs
> without this commit.  But on the system I built from source, I didn't see any
> issue before Linux 5.11.  So I misbelieved that it was something already 
> fixed.
> 
> Dan: you can try it on your PPC 64 with non-4K page as well.

yup, looks good here as well, ppc64le (Power9) system with 64KB pages


Dan
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Alberto Salvia Novella
I guessed so.

The GPU is a Radeon HD5870, and the screen is an old Telefunken TV
(TLFK22LEDPVR1).

Since my real display got into repair I used this TV meanwhile, and to my
surprise it froze the system.

On Tue, 30 Mar 2021 at 10:15, Christian König 
wrote:

> Hi Alberto,
>
> well what hardware do you have?
>
> Interlaced resolutions are not used any more on modern hardware, so they
> are not well tested.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
> > The entire desktop hangs after some minutes when using the module
> > "radeon" with an interlaced resolution.
> >
> > Easier to trigger by playing a video on Firefox, at least on kwin_x11.
> > Wayland didn't exhibit the problem.
> >
> > Other display drivers, from different computers I have tried, didn't
> > allow those interlaced resolutions all together. It seems they know
> > there will be problems.
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

2021-03-30 Thread Christian König

Am 30.03.21 um 15:15 schrieb Chen, Guchun:

[AMD Public Use]

Thanks Christian, I will put laser focus on this patch after merging it.

I notice the same logic in radeon code radeon_ttm_tt_unpin_userptr. Shall I 
create another patch to fix it as well?


If you have time, then please do so. Cause those bugs are on my todo 
list for quite a while and I couldn't find time to fix them.


Regards,
Christian.



Regards,
Guchun

-Original Message-
From: Christian König 
Sent: Tuesday, March 30, 2021 6:39 PM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Koenig, Christian 
; Zhang, Hawking 
Subject: Re: [PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

Am 30.03.21 um 12:02 schrieb Guchun Chen:

ttm->sg needs to be checked before accessing its child member.

Call Trace:
   amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
   ttm_bo_cleanup_memtype_use+0x3a/0x60 [ttm]
   ttm_bo_release+0x17d/0x300 [ttm]
   amdgpu_bo_unref+0x1a/0x30 [amdgpu]
   amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x78b/0x8b0 [amdgpu]
   kfd_ioctl_alloc_memory_of_gpu+0x118/0x220 [amdgpu]
   kfd_ioctl+0x222/0x400 [amdgpu]
   ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
   __x64_sys_ioctl+0x8e/0xd0
   ? __context_tracking_exit+0x52/0x90
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f97f264d317
Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff
ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d
01 f0 ff ff 73 01 c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
RSP: 002b:7ffdb402c338 EFLAGS: 0246 ORIG_RAX: 0010
RAX: ffda RBX: 7f97f3cc63a0 RCX: 7f97f264d317
RDX: 7ffdb402c380 RSI: c0284b16 RDI: 0003
RBP: 7ffdb402c380 R08: 7ffdb402c428 R09: c404
R10: c404 R11: 0246 R12: c0284b16
R13: 0003 R14: 7f97f3cc63a0 R15: 7f883620

Signed-off-by: Guchun Chen 

Yeah I had this one on my TODO list as well.

For now the patch is Acked-by: Christian König , but 
I'm not 100% sure if this is the right fix.

Please keep an eye open if anybody complains about issues with this patch, if 
yes we need to get back to the drawing board.

Christian.


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..722efd86718e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -867,7 +867,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
   
   	/* double check that we don't free the table twice */

-   if (!ttm->sg->sgl)
+   if (!ttm->sg || !ttm->sg->sgl)
return;
   
   	/* unmap the pages mapped to the device */


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

2021-03-30 Thread Chen, Guchun
[AMD Public Use]

Thanks Christian, I will put laser focus on this patch after merging it.

I notice the same logic in radeon code radeon_ttm_tt_unpin_userptr. Shall I 
create another patch to fix it as well?

Regards,
Guchun

-Original Message-
From: Christian König  
Sent: Tuesday, March 30, 2021 6:39 PM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Koenig, 
Christian ; Zhang, Hawking 
Subject: Re: [PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

Am 30.03.21 um 12:02 schrieb Guchun Chen:
> ttm->sg needs to be checked before accessing its child member.
>
> Call Trace:
>   amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
>   ttm_bo_cleanup_memtype_use+0x3a/0x60 [ttm]
>   ttm_bo_release+0x17d/0x300 [ttm]
>   amdgpu_bo_unref+0x1a/0x30 [amdgpu]
>   amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x78b/0x8b0 [amdgpu]
>   kfd_ioctl_alloc_memory_of_gpu+0x118/0x220 [amdgpu]
>   kfd_ioctl+0x222/0x400 [amdgpu]
>   ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
>   __x64_sys_ioctl+0x8e/0xd0
>   ? __context_tracking_exit+0x52/0x90
>   do_syscall_64+0x33/0x80
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x7f97f264d317
> Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff 
> ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 
> 01 f0 ff ff 73 01 c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
> RSP: 002b:7ffdb402c338 EFLAGS: 0246 ORIG_RAX: 0010
> RAX: ffda RBX: 7f97f3cc63a0 RCX: 7f97f264d317
> RDX: 7ffdb402c380 RSI: c0284b16 RDI: 0003
> RBP: 7ffdb402c380 R08: 7ffdb402c428 R09: c404
> R10: c404 R11: 0246 R12: c0284b16
> R13: 0003 R14: 7f97f3cc63a0 R15: 7f883620
>
> Signed-off-by: Guchun Chen 

Yeah I had this one on my TODO list as well.

For now the patch is Acked-by: Christian König , but 
I'm not 100% sure if this is the right fix.

Please keep an eye open if anybody complains about issues with this patch, if 
yes we need to get back to the drawing board.

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index e00263bcc88b..722efd86718e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -867,7 +867,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
> *bdev,
>   DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
>   
>   /* double check that we don't free the table twice */
> - if (!ttm->sg->sgl)
> + if (!ttm->sg || !ttm->sg->sgl)
>   return;
>   
>   /* unmap the pages mapped to the device */
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix compiler warning

2021-03-30 Thread Christian König

Am 30.03.21 um 15:10 schrieb Chen, Guchun:

[AMD Public Use]

Inline comments after yours'.

Regards,
Guchun

-Original Message-
From: Koenig, Christian 
Sent: Tuesday, March 30, 2021 6:40 PM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Zhang, Hawking 

Subject: Re: [PATCH 2/2] drm/amdgpu: fix compiler warning

Am 30.03.21 um 12:02 schrieb Guchun Chen:

warning: ISO C90 forbids mixed declarations and code 
[-Wdeclaration-after-statement]
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

Well there seems to be some kind of bug in the compiler if it complains about 
the code below.
[Guchun]From linux coding style's perspective, we shall put the declarations 
together, separated from code by one blank line, right?


Correct, yes. I don't mind blank lines when you have for example some 
consts, but others sometimes complain about that.





Signed-off-by: Guchun Chen 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 --
   1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 722efd86718e..2a6fc0556386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -824,7 +824,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
*bdev,
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
-

Better have variable like "r" and "i" declared last.
[Guchun]Will send v2 to address this if you don't have objection to this patch.


Thanks,
Christian.



Christian.


int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -861,7 +860,6 @@ static 
void
amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
   {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: fix compiler warning

2021-03-30 Thread Chen, Guchun
[AMD Public Use]

Inline comments after yours'.

Regards,
Guchun

-Original Message-
From: Koenig, Christian  
Sent: Tuesday, March 30, 2021 6:40 PM
To: Chen, Guchun ; amd-gfx@lists.freedesktop.org; Zhang, 
Hawking 
Subject: Re: [PATCH 2/2] drm/amdgpu: fix compiler warning

Am 30.03.21 um 12:02 schrieb Guchun Chen:
> warning: ISO C90 forbids mixed declarations and code 
> [-Wdeclaration-after-statement]
>int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

Well there seems to be some kind of bug in the compiler if it complains about 
the code below.
[Guchun]From linux coding style's perspective, we shall put the declarations 
together, separated from code by one blank line, right?

>
> Signed-off-by: Guchun Chen 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 --
>   1 file changed, 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 722efd86718e..2a6fc0556386 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -824,7 +824,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
> *bdev,
>   struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   int r;
> -

Better have variable like "r" and "i" declared last.
[Guchun]Will send v2 to address this if you don't have objection to this patch.

Christian.

>   int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
>   enum dma_data_direction direction = write ?
>   DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -861,7 +860,6 @@ static 
> void 
> amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
>   {
>   struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
>   struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -
>   int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
>   enum dma_data_direction direction = write ?
>   DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Christian König

Am 30.03.21 um 15:00 schrieb Dan Horák:

On Tue, 30 Mar 2021 14:55:01 +0200
Christian König  wrote:


Am 30.03.21 um 14:04 schrieb Xi Ruoyao:

On 2021-03-30 03:40 +0800, Xi Ruoyao wrote:

On 2021-03-29 21:36 +0200, Christian König wrote:

Am 29.03.21 um 21:27 schrieb Xi Ruoyao:

Hi Christian,

I don't think there is any constraint implemented to ensure `num_entries %
AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0`.  For example, in `amdgpu_vm_bo_map()`:

   /* validate the parameters */
   if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK
   size == 0 || size & AMDGPU_GPU_PAGE_MASK)
   return -EINVAL;

/* snip */

   saddr /= AMDGPU_GPU_PAGE_SIZE;
   eaddr /= AMDGPU_GPU_PAGE_SIZE;

/* snip */

   mapping->start = saddr;
   mapping->last = eaddr;


If we really want to ensure (mapping->last - mapping->start + 1) %
AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0, then we should replace
"AMDGPU_GPU_PAGE_MASK"
in "validate the parameters" with "PAGE_MASK".

It should be "~PAGE_MASK", "PAGE_MASK" has an opposite convention of
"AMDGPU_GPU_PAGE_MASK" :(.


Yeah, good point.


I tried it and it broke userspace: Xorg startup fails with EINVAL with
this
change.

Well in theory it is possible that we always fill the GPUVM on a 4k
basis while the native page size of the CPU is larger. Let me double
check the code.

On my platform, there are two issues both causing the VM corruption.  One is
fixed by agd5f/linux@fe001e7.

What is fe001e7? A commit id? If yes then that is to short and I can't
find it.

it's a gitlab shortcut for
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fagd5f%2Flinux%2F-%2Fcommit%2Ffe001e70a55d0378328612be1fdc3abfc68b9cccdata=04%7C01%7Cchristian.koenig%40amd.com%7Cd16d123aaa01420ebd0808d8f37bbf2f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637527060812278536%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=5rFVLxSRnfHUGjhoiqx1e6SeROqbg4ZPef%2BxEvgv%2BTg%3Dreserved=0


Ah! Yes I would expect that this patch is fixing something in this use case.

Thanks,
Christian.




Dan

Another is in Mesa from userspace:  it uses
`dev_info->gart_page_size` as the alignment, but the kernel AMDGPU driver
expects it to use `dev_info->virtual_address_alignment`.

Mhm, looking at the kernel code I would rather say Mesa is correct and
the kernel driver is broken.

The gart_page_size is limited by the CPU page size, but the
virtual_address_alignment isn't.


If we can make the change to fill the GPUVM on a 4k basis, we can fix this issue
and make virtual_address_alignment = 4K.  Otherwise, we should fortify the
parameter validation, changing "AMDGPU_GPU_PAGE_MASK" to "~PAGE_MASK".  Then the
userspace will just get an EINVAL, instead of a slient VM corruption.  And
someone should tell Mesa developers to fix the code in this case.

I rather see this as a kernel bug. Can you test if this code fragment
fixes your issue:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 64beb3399604..e1260b517e1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void
*data, struct drm_file *filp)
      }
      dev_info->virtual_address_alignment =
max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
      dev_info->pte_fragment_size = (1 <<
adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
-   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+   dev_info->gart_page_size =
dev_info->virtual_address_alignment;
      dev_info->cu_active_number = adev->gfx.cu_info.number;
      dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
      dev_info->ce_ram_size = adev->gfx.ce_ram_size;


Thanks,
Christian.


--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix an underflow on non-4KB-page systems

2021-03-30 Thread Christian König

Am 30.03.21 um 14:04 schrieb Xi Ruoyao:

On 2021-03-30 03:40 +0800, Xi Ruoyao wrote:

On 2021-03-29 21:36 +0200, Christian König wrote:

Am 29.03.21 um 21:27 schrieb Xi Ruoyao:

Hi Christian,

I don't think there is any constraint implemented to ensure `num_entries %
AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0`.  For example, in `amdgpu_vm_bo_map()`:

  /* validate the parameters */
  if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK
  size == 0 || size & AMDGPU_GPU_PAGE_MASK)
  return -EINVAL;

/* snip */

  saddr /= AMDGPU_GPU_PAGE_SIZE;
  eaddr /= AMDGPU_GPU_PAGE_SIZE;

/* snip */

  mapping->start = saddr;
  mapping->last = eaddr;


If we really want to ensure (mapping->last - mapping->start + 1) %
AMDGPU_GPU_PAGES_IN_CPU_PAGE == 0, then we should replace
"AMDGPU_GPU_PAGE_MASK"
in "validate the parameters" with "PAGE_MASK".

It should be "~PAGE_MASK", "PAGE_MASK" has an opposite convention of
"AMDGPU_GPU_PAGE_MASK" :(.


Yeah, good point.


I tried it and it broke userspace: Xorg startup fails with EINVAL with
this
change.

Well in theory it is possible that we always fill the GPUVM on a 4k
basis while the native page size of the CPU is larger. Let me double
check the code.

On my platform, there are two issues both causing the VM corruption.  One is
fixed by agd5f/linux@fe001e7.


What is fe001e7? A commit id? If yes then that is to short and I can't 
find it.



   Another is in Mesa from userspace:  it uses
`dev_info->gart_page_size` as the alignment, but the kernel AMDGPU driver
expects it to use `dev_info->virtual_address_alignment`.


Mhm, looking at the kernel code I would rather say Mesa is correct and 
the kernel driver is broken.


The gart_page_size is limited by the CPU page size, but the 
virtual_address_alignment isn't.



If we can make the change to fill the GPUVM on a 4k basis, we can fix this issue
and make virtual_address_alignment = 4K.  Otherwise, we should fortify the
parameter validation, changing "AMDGPU_GPU_PAGE_MASK" to "~PAGE_MASK".  Then the
userspace will just get an EINVAL, instead of a slient VM corruption.  And
someone should tell Mesa developers to fix the code in this case.


I rather see this as a kernel bug. Can you test if this code fragment 
fixes your issue:


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 64beb3399604..e1260b517e1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -780,7 +780,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file *filp)

    }
    dev_info->virtual_address_alignment = 
max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
    dev_info->pte_fragment_size = (1 << 
adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;

-   dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+   dev_info->gart_page_size = 
dev_info->virtual_address_alignment;

    dev_info->cu_active_number = adev->gfx.cu_info.number;
    dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
    dev_info->ce_ram_size = adev->gfx.ce_ram_size;


Thanks,
Christian.


--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: check whether s2idle is enabled to determine s0ix

2021-03-30 Thread Lazar, Lijo
[AMD Public Use]

Maybe just check the global (pm_suspend_target_state == PM_SUSPEND_TO_IDLE) and 
decide whether it's going to s0ix.

Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of Liang, Prike
Sent: Tuesday, March 30, 2021 4:32 PM
To: Deucher, Alexander ; 
amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: RE: [PATCH] drm/amdgpu: check whether s2idle is enabled to determine 
s0ix

[AMD Public Use]

[AMD Public Use]

This issue should occur on the hybrid s0i3 system which forces mem_sleep to 
deep level on the s0i3 enabled platform. We may need use the 
acpi_target_system_state() to identify the system target sleep level and then 
handle AMDGPU Sx[0..5] suspend/resume respectively.

Reviewed-by: Prike Liang 

Thanks,
Prike
> -Original Message-
> From: amd-gfx  On Behalf Of 
> Alex Deucher
> Sent: Tuesday, March 30, 2021 1:41 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH] drm/amdgpu: check whether s2idle is enabled to 
> determine s0ix
>
> For legacy S3, we need to use different handling in the driver.
> Suggested by Heiko Przybyl.
>
> Bug:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitl
> a
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1553data=04%7C01%7CPrike.Liang%40amd.com%7Ce0a
> 7dd7c6958449814c508d8f2d9d1c1%7C3dd8961fe4884e608e11a82d994e183d
> %7C0%7C0%7C637526364636883173%7CUnknown%7CTWFpbGZsb3d8eyJWI
> joiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1
> 000sdata=SHtEYsQz5%2FgkSGXE%2B9OFzN4yaRgkw6A2xs9IxVy3e5Q%
> 3Dreserved=0
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> index 2e9b16fb3fcd..b64c002b9aa3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> @@ -906,7 +907,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct
> amdgpu_device *adev)  #if defined(CONFIG_AMD_PMC) ||
> defined(CONFIG_AMD_PMC_MODULE)
>  if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {  if (adev->flags 
> & AMD_IS_APU) -return true;
> +return pm_suspend_default_s2idle();
>  }
>  #endif
>  return false;
> --
> 2.30.2
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.f
> reedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7CPrike.Liang%40amd.com%7Ce0a7dd7c69584498
> 14c508d8f2d9d1c1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C
> 637526364636883173%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwM
> DAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
> a=BZF35Ev5P7Guzwd3HYUGksztE30D4Cc9mhWk%2FjbaTyA%3Dreserv
> ed=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Clijo.lazar%40amd.com%7C4287492ad9c8471d393608d8f36b354d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637526989080007698%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=g5VKbmTjSdMssg0OVEB9fYbDh8cn%2F5LJx%2FzKGSi7%2BLE%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: check whether s2idle is enabled to determine s0ix

2021-03-30 Thread Liang, Prike
[AMD Public Use]

This issue should occur on the hybrid s0i3 system which forces mem_sleep to 
deep level on the s0i3 enabled platform. We may need use the 
acpi_target_system_state() to identify the system target sleep level and then 
handle AMDGPU Sx[0..5] suspend/resume respectively.

Reviewed-by: Prike Liang 

Thanks,
Prike
> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Tuesday, March 30, 2021 1:41 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH] drm/amdgpu: check whether s2idle is enabled to determine
> s0ix
>
> For legacy S3, we need to use different handling in the driver.
> Suggested by Heiko Przybyl.
>
> Bug:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1553data=04%7C01%7CPrike.Liang%40amd.com%7Ce0a
> 7dd7c6958449814c508d8f2d9d1c1%7C3dd8961fe4884e608e11a82d994e183d
> %7C0%7C0%7C637526364636883173%7CUnknown%7CTWFpbGZsb3d8eyJWI
> joiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1
> 000sdata=SHtEYsQz5%2FgkSGXE%2B9OFzN4yaRgkw6A2xs9IxVy3e5Q%
> 3Dreserved=0
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> index 2e9b16fb3fcd..b64c002b9aa3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> @@ -906,7 +907,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct
> amdgpu_device *adev)  #if defined(CONFIG_AMD_PMC) ||
> defined(CONFIG_AMD_PMC_MODULE)
>  if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
>  if (adev->flags & AMD_IS_APU)
> -return true;
> +return pm_suspend_default_s2idle();
>  }
>  #endif
>  return false;
> --
> 2.30.2
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.f
> reedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7CPrike.Liang%40amd.com%7Ce0a7dd7c69584498
> 14c508d8f2d9d1c1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C
> 637526364636883173%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwM
> DAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
> a=BZF35Ev5P7Guzwd3HYUGksztE30D4Cc9mhWk%2FjbaTyA%3Dreserv
> ed=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix compiler warning

2021-03-30 Thread Christian König

Am 30.03.21 um 12:02 schrieb Guchun Chen:

warning: ISO C90 forbids mixed declarations and code 
[-Wdeclaration-after-statement]
   int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);


Well there seems to be some kind of bug in the compiler if it complains 
about the code below.


Signed-off-by: Guchun Chen 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 722efd86718e..2a6fc0556386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -824,7 +824,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
*bdev,
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
-


Better have variable like "r" and "i" declared last.

Christian.


int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -861,7 +860,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

2021-03-30 Thread Christian König

Am 30.03.21 um 12:02 schrieb Guchun Chen:

ttm->sg needs to be checked before accessing its child member.

Call Trace:
  amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
  ttm_bo_cleanup_memtype_use+0x3a/0x60 [ttm]
  ttm_bo_release+0x17d/0x300 [ttm]
  amdgpu_bo_unref+0x1a/0x30 [amdgpu]
  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x78b/0x8b0 [amdgpu]
  kfd_ioctl_alloc_memory_of_gpu+0x118/0x220 [amdgpu]
  kfd_ioctl+0x222/0x400 [amdgpu]
  ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
  __x64_sys_ioctl+0x8e/0xd0
  ? __context_tracking_exit+0x52/0x90
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f97f264d317
Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 
2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 
8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
RSP: 002b:7ffdb402c338 EFLAGS: 0246 ORIG_RAX: 0010
RAX: ffda RBX: 7f97f3cc63a0 RCX: 7f97f264d317
RDX: 7ffdb402c380 RSI: c0284b16 RDI: 0003
RBP: 7ffdb402c380 R08: 7ffdb402c428 R09: c404
R10: c404 R11: 0246 R12: c0284b16
R13: 0003 R14: 7f97f3cc63a0 R15: 7f883620

Signed-off-by: Guchun Chen 


Yeah I had this one on my TODO list as well.

For now the patch is Acked-by: Christian König 
, but I'm not 100% sure if this is the right fix.


Please keep an eye open if anybody complains about issues with this 
patch, if yes we need to get back to the drawing board.


Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..722efd86718e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -867,7 +867,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
  
  	/* double check that we don't free the table twice */

-   if (!ttm->sg->sgl)
+   if (!ttm->sg || !ttm->sg->sgl)
return;
  
  	/* unmap the pages mapped to the device */


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: fix compiler warning

2021-03-30 Thread Guchun Chen
warning: ISO C90 forbids mixed declarations and code 
[-Wdeclaration-after-statement]
  int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 722efd86718e..2a6fc0556386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -824,7 +824,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device 
*bdev,
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -861,7 +860,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: fix NULL pointer dereference

2021-03-30 Thread Guchun Chen
ttm->sg needs to be checked before accessing its child member.

Call Trace:
 amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
 ttm_bo_cleanup_memtype_use+0x3a/0x60 [ttm]
 ttm_bo_release+0x17d/0x300 [ttm]
 amdgpu_bo_unref+0x1a/0x30 [amdgpu]
 amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x78b/0x8b0 [amdgpu]
 kfd_ioctl_alloc_memory_of_gpu+0x118/0x220 [amdgpu]
 kfd_ioctl+0x222/0x400 [amdgpu]
 ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
 __x64_sys_ioctl+0x8e/0xd0
 ? __context_tracking_exit+0x52/0x90
 do_syscall_64+0x33/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f97f264d317
Code: b3 66 90 48 8b 05 71 4b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff 
c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 
c3 48 8b 0d 41 4b 2d 00 f7 d8 64 89 01 48
RSP: 002b:7ffdb402c338 EFLAGS: 0246 ORIG_RAX: 0010
RAX: ffda RBX: 7f97f3cc63a0 RCX: 7f97f264d317
RDX: 7ffdb402c380 RSI: c0284b16 RDI: 0003
RBP: 7ffdb402c380 R08: 7ffdb402c428 R09: c404
R10: c404 R11: 0246 R12: c0284b16
R13: 0003 R14: 7f97f3cc63a0 R15: 7f883620

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..722efd86718e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -867,7 +867,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
/* double check that we don't free the table twice */
-   if (!ttm->sg->sgl)
+   if (!ttm->sg || !ttm->sg->sgl)
return;
 
/* unmap the pages mapped to the device */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Emily Deng
From: "Emily.Deng" 

For vf assigned to guest VM, after FLR, the msix table will be reset.
As the flr is done on host driver. The qemu and vfio driver don't know
this, and the msix is still enable from qemu and vfio driver side.
So if want to  re-setup the msix table, first need to disable and
re-enable the msix from guest VM side or the qemu will do nothing as
it thought the msix is already enabled.

v2:
Change name with amdgpu_irq prefix, remove #ifdef.

Signed-off-by: Emily.Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 03412543427a..3045f52e613d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
 }
 
+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev)
+{
+   u16 ctrl;
+
+   pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, 
);
+   ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+   ctrl |= PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+}
+
 /**
  * amdgpu_irq_init - initialize interrupt handling
  *
@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
 {
int i, j, k;
 
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_irq_restore_msix(adev);
+
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

>-Original Message-
>From: Das, Nirmoy 
>Sent: Tuesday, March 30, 2021 5:34 PM
>To: Deng, Emily ; Das, Nirmoy
>; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov
>
>
>On 3/30/21 11:29 AM, Deng, Emily wrote:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>> -Original Message-
>>> From: Das, Nirmoy 
>>> Sent: Tuesday, March 30, 2021 4:59 PM
>>> To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov
>>>
>>>
>>> On 3/30/21 10:14 AM, Emily Deng wrote:
 From: "Emily.Deng" 

 After FLR, the msix will be cleared, so need to toggle it for sriov.

 v2:
 Change name with amdgpu_irq prefix, remove #ifdef.

 Signed-off-by: Emily.Deng 
 ---
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
1 file changed, 14 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
 index 03412543427a..3045f52e613d 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
 @@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct
>amdgpu_device
>>> *adev)
return true;
}

 +static void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
 +u16 ctrl;
 +
 +pci_read_config_word(adev->pdev, adev->pdev->msix_cap +
>>> PCI_MSIX_FLAGS, );
 +ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
 +pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>>> PCI_MSIX_FLAGS, ctrl);
 +ctrl |= PCI_MSIX_FLAGS_ENABLE;
 +pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
 +PCI_MSIX_FLAGS, ctrl);
>>>
>>> Why write 1st clear and then set the msix flag if we know that msix
>>> is already cleared
>> For vf assigned to guest VM, after FLR, the msix table will be reset.
>> As the flr is done on host driver. The qemu and vfio driver don't know
>> this, and the msix is still enable from qemu and vfio driver side. So if 
>> want to
>re-setup the msix table, first need to disable and re-enable the msix from
>guest VM side or the qemu will do nothing as it thought the msix is already
>enabled.
>
>
>Thanks for the detailed explanation, Emily. Please add a comment so that we
>know/remember why we are doing this.
Ok, will do this. Thanks.
>
>
>Nirmoy
>
>
>>>
>>>
 +}
 +
/**
 * amdgpu_irq_init - initialize interrupt handling
 *
 @@ -558,6 +569,9 @@ void
>amdgpu_irq_gpu_reset_resume_helper(struct
>>> amdgpu_device *adev)
{
int i, j, k;

 +if (amdgpu_sriov_vf(adev))
 +amdgpu_irq_restore_msix(adev);
>>>
>>> Is it possible to load amdgpu on guest without msix ? If so then we need
>>> to probe if msix is enabled.
It is decided by host driver, not guest driver.
>>>
>>>
>>> Nirmoy
>>>
>>>
 +
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Nirmoy



On 3/30/21 11:29 AM, Deng, Emily wrote:

[AMD Official Use Only - Internal Distribution Only]


-Original Message-
From: Das, Nirmoy 
Sent: Tuesday, March 30, 2021 4:59 PM
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov


On 3/30/21 10:14 AM, Emily Deng wrote:

From: "Emily.Deng" 

After FLR, the msix will be cleared, so need to toggle it for sriov.

v2:
Change name with amdgpu_irq prefix, remove #ifdef.

Signed-off-by: Emily.Deng 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
   1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 03412543427a..3045f52e613d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device

*adev)

   return true;
   }

+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
+u16 ctrl;
+
+pci_read_config_word(adev->pdev, adev->pdev->msix_cap +

PCI_MSIX_FLAGS, );

+ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +

PCI_MSIX_FLAGS, ctrl);

+ctrl |= PCI_MSIX_FLAGS_ENABLE;
+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
+PCI_MSIX_FLAGS, ctrl);


Why write 1st clear and then set the msix flag if we know that msix is already
cleared

For vf assigned to guest VM, after FLR, the msix table will be reset. As the 
flr is done on host driver. The qemu and vfio driver don't know
this, and the msix is still enable from qemu and vfio driver side. So if want 
to  re-setup the msix table, first need to disable and re-enable the msix from 
guest VM side or the qemu will do nothing
as it thought the msix is already enabled.



Thanks for the detailed explanation, Emily. Please add a comment so that 
we know/remember why we are doing this.



Nirmoy






+}
+
   /**
* amdgpu_irq_init - initialize interrupt handling
*
@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct

amdgpu_device *adev)

   {
   int i, j, k;

+if (amdgpu_sriov_vf(adev))
+amdgpu_irq_restore_msix(adev);


Is it possible to load amdgpu on guest without msix ? If so then we need
to probe if msix is enabled.


Nirmoy



+
   for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
   if (!adev->irq.client[i].sources)
   continue;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

>-Original Message-
>From: Das, Nirmoy 
>Sent: Tuesday, March 30, 2021 4:59 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov
>
>
>On 3/30/21 10:14 AM, Emily Deng wrote:
>> From: "Emily.Deng" 
>>
>> After FLR, the msix will be cleared, so need to toggle it for sriov.
>>
>> v2:
>> Change name with amdgpu_irq prefix, remove #ifdef.
>>
>> Signed-off-by: Emily.Deng 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> index 03412543427a..3045f52e613d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>> @@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device
>*adev)
>>   return true;
>>   }
>>
>> +static void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
>> +u16 ctrl;
>> +
>> +pci_read_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, );
>> +ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
>> +pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, ctrl);
>> +ctrl |= PCI_MSIX_FLAGS_ENABLE;
>> +pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>> +PCI_MSIX_FLAGS, ctrl);
>
>
>Why write 1st clear and then set the msix flag if we know that msix is already
>cleared
For vf assigned to guest VM, after FLR, the msix table will be reset. As the 
flr is done on host driver. The qemu and vfio driver don't know
this, and the msix is still enable from qemu and vfio driver side. So if want 
to  re-setup the msix table, first need to disable and re-enable the msix from 
guest VM side or the qemu will do nothing
as it thought the msix is already enabled.
>
>
>
>> +}
>> +
>>   /**
>>* amdgpu_irq_init - initialize interrupt handling
>>*
>> @@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct
>amdgpu_device *adev)
>>   {
>>   int i, j, k;
>>
>> +if (amdgpu_sriov_vf(adev))
>> +amdgpu_irq_restore_msix(adev);
>
>
>Is it possible to load amdgpu on guest without msix ? If so then we need
>to probe if msix is enabled.
>
>
>Nirmoy
>
>
>> +
>>   for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
>>   if (!adev->irq.client[i].sources)
>>   continue;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

Hi Christian,
 Ok, will investigate this more for memory leak. But even I fixed this 
memory leak this time, it couldn't promise anymore memory leak in future. 
Memory leak shouldn't cause kernel crush, and couldn't
be used anymore.

Best wishes
Emily Deng



>-Original Message-
>From: Christian König 
>Sent: Tuesday, March 30, 2021 4:38 PM
>To: Deng, Emily ; Chen, Jiansong (Simon)
>; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
>Hi Emily,
>
>as I said add a WARN_ON() and look at the backtrace.
>
>It could be that the backtrace then just shows the general cleanup functions,
>but it is at least a start.
>
>On the other hand if you only see this sometimes then we have some kind of
>race condition and need to dig deeper.
>
>Christian.
>
>Am 30.03.21 um 10:19 schrieb Deng, Emily:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Hi Christian,
>>   Yes, I agree both with you. But the issue occurs randomly and in
>> unload driver and in fairly low rate. It is hard to debug where is the memory
>leak. Could you give some suggestion about how to debug this issue?
>>
>>
>> Best wishes
>> Emily Deng
>>
>>
>>
>>> -Original Message-
>>> From: Christian König 
>>> Sent: Tuesday, March 30, 2021 3:11 PM
>>> To: Deng, Emily ; Chen, Jiansong (Simon)
>>> ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>>>
>>> Good morning,
>>>
>>> yes Jiansong is right that patch is really not a good idea.
>>>
>>> Moving buffers can indeed happen during shutdown while some memory
>is
>>> still referenced.
>>>
>>> Just ignoring the move is not the right approach, you need to find
>>> out why the memory is moved in the first place.
>>>
>>> You could add something like WARN_ON(adev->shutdown);
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 09:05 schrieb Deng, Emily:
 [AMD Official Use Only - Internal Distribution Only]

 Hi Jiansong,
It does happen,  maybe have the race condition?


 Best wishes
 Emily Deng



> -Original Message-
> From: Chen, Jiansong (Simon) 
> Sent: Tuesday, March 30, 2021 2:49 PM
> To: Deng, Emily ; amd-
>g...@lists.freedesktop.org
> Cc: Deng, Emily 
> Subject: RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
> [AMD Official Use Only - Internal Distribution Only]
>
> I still wonder how the issue takes place? According to my humble
> knowledge in driver model, the reference count of the kobject for
> the device will not reach zero when there is still some device mem
> access, and shutdown should not happen.
>
> Regards,
> Jiansong
> -Original Message-
> From: amd-gfx  On Behalf Of
> Emily Deng
> Sent: Tuesday, March 30, 2021 12:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily 
> Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
> During driver unloading, don't need to copy mem, or it will
> introduce some call trace, such as when sa_manager is freed, it
> will introduce warn call trace in amdgpu_sa_bo_new.
>
> Signed-off-by: Emily Deng 
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index e00263bcc88b..f0546a489e0d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct
> amdgpu_device *adev,  struct dma_fence *fence = NULL;  int r = 0;
>
> +if (adev->shutdown)
> +return 0;
> +
> if (!adev->mman.buffer_funcs_enabled) {  DRM_ERROR("Trying to move
> memory with ring turned off.\n");  return -EINVAL;
> --
> 2.25.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fl
> is
> ts.fr
> eedesktop.org%2Fmailman%2Flistinfo%2Famd-
>
>>>
>gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b96247
>>>
>6a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7
>>>
>C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
>>>
>MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
>>>
>a=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved
> =0
 ___
 amd-gfx mailing list
 amd-gfx@lists.freedesktop.org
 https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fli
 st
 s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-
>>> gfxdata=04%7C01%7CEm
>>>
>ily.Deng%40amd.com%7Cffacb4715aff4ba4336808d8f34af62d%7C3dd8961fe4
>>> 884e
>>>

Re: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Nirmoy



On 3/30/21 10:14 AM, Emily Deng wrote:

From: "Emily.Deng" 

After FLR, the msix will be cleared, so need to toggle it for sriov.

v2:
Change name with amdgpu_irq prefix, remove #ifdef.

Signed-off-by: Emily.Deng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 03412543427a..3045f52e613d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
  }
  
+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev)

+{
+   u16 ctrl;
+
+   pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, 
);
+   ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+   ctrl |= PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);



Why write 1st clear and then set the msix flag if we know that msix is 
already cleared.





+}
+
  /**
   * amdgpu_irq_init - initialize interrupt handling
   *
@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
  {
int i, j, k;
  
+	if (amdgpu_sriov_vf(adev))

+   amdgpu_irq_restore_msix(adev);



Is it possible to load amdgpu on guest without msix ? If so then we need 
to probe if msix is enabled.



Nirmoy



+
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Christian König

Hi Emily,

as I said add a WARN_ON() and look at the backtrace.

It could be that the backtrace then just shows the general cleanup 
functions, but it is at least a start.


On the other hand if you only see this sometimes then we have some kind 
of race condition and need to dig deeper.


Christian.

Am 30.03.21 um 10:19 schrieb Deng, Emily:

[AMD Official Use Only - Internal Distribution Only]

Hi Christian,
  Yes, I agree both with you. But the issue occurs randomly and in unload 
driver and in fairly low rate. It is hard to debug where is the memory leak. 
Could you give some suggestion about how
to debug this issue?


Best wishes
Emily Deng




-Original Message-
From: Christian König 
Sent: Tuesday, March 30, 2021 3:11 PM
To: Deng, Emily ; Chen, Jiansong (Simon)
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

Good morning,

yes Jiansong is right that patch is really not a good idea.

Moving buffers can indeed happen during shutdown while some memory is
still referenced.

Just ignoring the move is not the right approach, you need to find out why the
memory is moved in the first place.

You could add something like WARN_ON(adev->shutdown);

Regards,
Christian.

Am 30.03.21 um 09:05 schrieb Deng, Emily:

[AMD Official Use Only - Internal Distribution Only]

Hi Jiansong,
   It does happen,  maybe have the race condition?


Best wishes
Emily Deng




-Original Message-
From: Chen, Jiansong (Simon) 
Sent: Tuesday, March 30, 2021 2:49 PM
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Cc: Deng, Emily 
Subject: RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

[AMD Official Use Only - Internal Distribution Only]

I still wonder how the issue takes place? According to my humble
knowledge in driver model, the reference count of the kobject for the
device will not reach zero when there is still some device mem
access, and shutdown should not happen.

Regards,
Jiansong
-Original Message-
From: amd-gfx  On Behalf Of
Emily Deng
Sent: Tuesday, March 30, 2021 12:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deng, Emily 
Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

During driver unloading, don't need to copy mem, or it will introduce
some call trace, such as when sa_manager is freed, it will introduce
warn call trace in amdgpu_sa_bo_new.

Signed-off-by: Emily Deng 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..f0546a489e0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct
amdgpu_device *adev,  struct dma_fence *fence = NULL;  int r = 0;

+if (adev->shutdown)
+return 0;
+
if (!adev->mman.buffer_funcs_enabled) {  DRM_ERROR("Trying to move
memory with ring turned off.\n");  return -EINVAL;
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
ts.fr
eedesktop.org%2Fmailman%2Flistinfo%2Famd-


gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b96247
6a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7
C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
a=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved

=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-

gfxdata=04%7C01%7CEm
ily.Deng%40amd.com%7Cffacb4715aff4ba4336808d8f34af62d%7C3dd8961fe4
884e
608e11a82d994e183d%7C0%7C0%7C637526850578585302%7CUnknown%7CT
WFpbGZsb3
d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%
3D%7
C1000sdata=u26JPASmJOF5nkXFSJP89PiUUFehvzf%2B2qxQM%2FgT9Ek
%3D

;reserved=0


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

Hi Christian,
 Yes, I agree both with you. But the issue occurs randomly and in unload 
driver and in fairly low rate. It is hard to debug where is the memory leak. 
Could you give some suggestion about how
to debug this issue?


Best wishes
Emily Deng



>-Original Message-
>From: Christian König 
>Sent: Tuesday, March 30, 2021 3:11 PM
>To: Deng, Emily ; Chen, Jiansong (Simon)
>; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
>Good morning,
>
>yes Jiansong is right that patch is really not a good idea.
>
>Moving buffers can indeed happen during shutdown while some memory is
>still referenced.
>
>Just ignoring the move is not the right approach, you need to find out why the
>memory is moved in the first place.
>
>You could add something like WARN_ON(adev->shutdown);
>
>Regards,
>Christian.
>
>Am 30.03.21 um 09:05 schrieb Deng, Emily:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Hi Jiansong,
>>   It does happen,  maybe have the race condition?
>>
>>
>> Best wishes
>> Emily Deng
>>
>>
>>
>>> -Original Message-
>>> From: Chen, Jiansong (Simon) 
>>> Sent: Tuesday, March 30, 2021 2:49 PM
>>> To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>>> Cc: Deng, Emily 
>>> Subject: RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>>>
>>> [AMD Official Use Only - Internal Distribution Only]
>>>
>>> I still wonder how the issue takes place? According to my humble
>>> knowledge in driver model, the reference count of the kobject for the
>>> device will not reach zero when there is still some device mem
>>> access, and shutdown should not happen.
>>>
>>> Regards,
>>> Jiansong
>>> -Original Message-
>>> From: amd-gfx  On Behalf Of
>>> Emily Deng
>>> Sent: Tuesday, March 30, 2021 12:42 PM
>>> To: amd-gfx@lists.freedesktop.org
>>> Cc: Deng, Emily 
>>> Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>>>
>>> During driver unloading, don't need to copy mem, or it will introduce
>>> some call trace, such as when sa_manager is freed, it will introduce
>>> warn call trace in amdgpu_sa_bo_new.
>>>
>>> Signed-off-by: Emily Deng 
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
>>> 1 file changed, 3 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index e00263bcc88b..f0546a489e0d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct
>>> amdgpu_device *adev,  struct dma_fence *fence = NULL;  int r = 0;
>>>
>>> +if (adev->shutdown)
>>> +return 0;
>>> +
>>> if (!adev->mman.buffer_funcs_enabled) {  DRM_ERROR("Trying to move
>>> memory with ring turned off.\n");  return -EINVAL;
>>> --
>>> 2.25.1
>>>
>>> ___
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>>> ts.fr
>>> eedesktop.org%2Fmailman%2Flistinfo%2Famd-
>>>
>gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b96247
>>>
>6a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7
>>>
>C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
>>>
>MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
>>>
>a=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved
>>> =0
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-
>gfxdata=04%7C01%7CEm
>>
>ily.Deng%40amd.com%7Cffacb4715aff4ba4336808d8f34af62d%7C3dd8961fe4
>884e
>>
>608e11a82d994e183d%7C0%7C0%7C637526850578585302%7CUnknown%7CT
>WFpbGZsb3
>>
>d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%
>3D%7
>>
>C1000sdata=u26JPASmJOF5nkXFSJP89PiUUFehvzf%2B2qxQM%2FgT9Ek
>%3D
>> ;reserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Interlaced resolutions hang the desktop

2021-03-30 Thread Christian König

Hi Alberto,

well what hardware do you have?

Interlaced resolutions are not used any more on modern hardware, so they 
are not well tested.


Regards,
Christian.

Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
The entire desktop hangs after some minutes when using the module 
"radeon" with an interlaced resolution.


Easier to trigger by playing a video on Firefox, at least on kwin_x11. 
Wayland didn't exhibit the problem.


Other display drivers, from different computers I have tried, didn't 
allow those interlaced resolutions all together. It seems they know 
there will be problems.


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-30 Thread Emily Deng
From: "Emily.Deng" 

After FLR, the msix will be cleared, so need to toggle it for sriov.

v2:
Change name with amdgpu_irq prefix, remove #ifdef.

Signed-off-by: Emily.Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 03412543427a..3045f52e613d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
 }
 
+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev)
+{
+   u16 ctrl;
+
+   pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, 
);
+   ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+   ctrl |= PCI_MSIX_FLAGS_ENABLE;
+   pci_write_config_word(adev->pdev, adev->pdev->msix_cap + 
PCI_MSIX_FLAGS, ctrl);
+}
+
 /**
  * amdgpu_irq_init - initialize interrupt handling
  *
@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
 {
int i, j, k;
 
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_irq_restore_msix(adev);
+
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 3/6] drm/amdgpu: Restore msix after FLR

2021-03-30 Thread Deng, Emily
Hi Guchun,
Ok, will make it to static function.

>-Original Message-
>From: Chen, Guchun 
>Sent: Tuesday, March 30, 2021 1:38 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: RE: [PATCH 3/6] drm/amdgpu: Restore msix after FLR
>
>[AMD Public Use]
>
>amdgpu_irq_restore_msix should be one static function?
>
>Regards,
>Guchun
>
>-Original Message-
>From: amd-gfx  On Behalf Of Emily
>Deng
>Sent: Tuesday, March 30, 2021 12:42 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH 3/6] drm/amdgpu: Restore msix after FLR
>
>From: "Emily.Deng" 
>
>After FLR, the msix will be cleared, so need to re-enable it.
>
>v2:
>Change name with amdgpu_irq prefix, remove #ifdef.
>
>Signed-off-by: Emily.Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 12 
> 1 file changed, 12 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>index 03412543427a..8936589bd7f9 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device
>*adev)
>   return true;
> }
>
>+void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
>+  u16 ctrl;
>+
>+  pci_read_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, );
>+  ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
>+  pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, ctrl);
>+  ctrl |= PCI_MSIX_FLAGS_ENABLE;
>+  pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>+PCI_MSIX_FLAGS, ctrl); }
>+
> /**
>  * amdgpu_irq_init - initialize interrupt handling
>  *
>@@ -558,6 +569,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct
>amdgpu_device *adev)  {
>   int i, j, k;
>
>+  amdgpu_irq_restore_msix(adev);
>   for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
>   if (!adev->irq.client[i].sources)
>   continue;
>--
>2.25.1
>
>___
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.fr
>eedesktop.org%2Fmailman%2Flistinfo%2Famd-
>gfxdata=04%7C01%7Cguchun.chen%40amd.com%7C6aff296c96104aef
>176208d8f3362acf%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C6
>37526761267513989%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMD
>AiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=B
>G4P%2FbJmn8PiLR%2BxTys8cVWK6924LWftjTXjKqrgnkg%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

>-Original Message-
>From: Christian König 
>Sent: Tuesday, March 30, 2021 3:24 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org;
>Deucher, Alexander 
>Subject: Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12
>
>Am 30.03.21 um 09:20 schrieb Deng, Emily:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>> -Original Message-
>>> From: Christian König 
>>> Sent: Tuesday, March 30, 2021 3:13 PM
>>> To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for
>>> navi12
>>>
>>>
>>>
>>> Am 30.03.21 um 06:41 schrieb Emily Deng:
 It will hit ramdomly sdma hang, and pending on utcl2 address
 translation when access the RPTR polling address.

 According sdma firmware team mentioned, the RPTR writeback is done
 by hardware automatically, and will hit issue when clock gating occurs.
 So stop using the rptr write back for sdma5.0.

 Signed-off-by: Emily Deng 
 ---
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 18 --
1 file changed, 12 insertions(+), 6 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
 b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
 index 920fc6d4a127..63e4a78181b8 100644
 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
 +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
 @@ -298,13 +298,19 @@ static void
>>> sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
 */
static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
{
 -u64 *rptr;
 +struct amdgpu_device *adev = ring->adev;
 +u64 rptr;
 +u32 lowbit, highbit;
 +
 +lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,
>>> mmSDMA0_GFX_RB_RPTR));
 +highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,
 +mmSDMA0_GFX_RB_RPTR_HI));
>>> That won't work like this.
>>>
>>> We have the readpointer writeback because we otherwise can't
>>> guarantee that the two 32bit values read from the registers are coherent.
>>>
>>> In other words it can be that the hi rptr is already wrapped around
>>> while the lo is still the old value.
>>>
>>> Why exactly doesn't the writeback work?
>>>
>>> Christian.
>> Issue occurs, when occurs clockgating, at the same time, the rptr write back
>occurs. At this time, the utcl2 translation will hang.
>
>Mhm, crap. Alex are you up to date on this bug?
>
>I'm not an expert on the SDMA, but my last status is that writeback is
>mandatory when we use 64bit rptr/wptr.
>
>Otherwise we need a workaround how to read a consistent 64bit rptr from
>two 32bit registers.
>
>Can you check the register documentation if there is any double buffering or
>stuff like that?
>
>Christian.
Hi Christian,
 Thanks to point out the inconsistent issue for 64 bit register. Please 
ignore this patch. Will try to fix the issue in sdma firmware.

Best wishes
Emily Deng


>
 -/* XXX check if swapping is necessary on BE */ -rptr = ((u64
 *)>adev->wb.wb[ring->rptr_offs]);
 +rptr = highbit;
 +rptr = rptr << 32;
 +rptr |= lowbit;

 -DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); -return
 ((*rptr) >> 2);
 +DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr); return (rptr
 +>> 2);
}

/**
 @@ -702,7 +708,7 @@ static int sdma_v5_0_gfx_resume(struct
>>> amdgpu_device *adev)
WREG32(sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_RB_RPTR_ADDR_LO),
   lower_32_bits(adev->wb.gpu_addr + wb_offset) &
>>> 0xFFFC);
 -rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
>>> RPTR_WRITEBACK_ENABLE, 1);
 +rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
 +RPTR_WRITEBACK_ENABLE, 0);

WREG32(sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
WREG32(sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_RB_BASE_HI),
 ring->gpu_addr >> 40);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12

2021-03-30 Thread Christian König

Am 30.03.21 um 09:20 schrieb Deng, Emily:

[AMD Official Use Only - Internal Distribution Only]


-Original Message-
From: Christian König 
Sent: Tuesday, March 30, 2021 3:13 PM
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12



Am 30.03.21 um 06:41 schrieb Emily Deng:

It will hit ramdomly sdma hang, and pending on utcl2 address
translation when access the RPTR polling address.

According sdma firmware team mentioned, the RPTR writeback is done by
hardware automatically, and will hit issue when clock gating occurs.
So stop using the rptr write back for sdma5.0.

Signed-off-by: Emily Deng 
---
   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 18 --
   1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a127..63e4a78181b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -298,13 +298,19 @@ static void

sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,

*/
   static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
   {
-u64 *rptr;
+struct amdgpu_device *adev = ring->adev;
+u64 rptr;
+u32 lowbit, highbit;
+
+lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,

mmSDMA0_GFX_RB_RPTR));

+highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,
+mmSDMA0_GFX_RB_RPTR_HI));

That won't work like this.

We have the readpointer writeback because we otherwise can't guarantee
that the two 32bit values read from the registers are coherent.

In other words it can be that the hi rptr is already wrapped around while the
lo is still the old value.

Why exactly doesn't the writeback work?

Christian.

Issue occurs, when occurs clockgating, at the same time, the rptr write back 
occurs. At this time, the utcl2 translation will hang.


Mhm, crap. Alex are you up to date on this bug?

I'm not an expert on the SDMA, but my last status is that writeback is 
mandatory when we use 64bit rptr/wptr.


Otherwise we need a workaround how to read a consistent 64bit rptr from 
two 32bit registers.


Can you check the register documentation if there is any double 
buffering or stuff like that?


Christian.


-/* XXX check if swapping is necessary on BE */
-rptr = ((u64 *)>adev->wb.wb[ring->rptr_offs]);
+rptr = highbit;
+rptr = rptr << 32;
+rptr |= lowbit;

-DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
-return ((*rptr) >> 2);
+DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
+return (rptr >> 2);
   }

   /**
@@ -702,7 +708,7 @@ static int sdma_v5_0_gfx_resume(struct

amdgpu_device *adev)

   WREG32(sdma_v5_0_get_reg_offset(adev, i,

mmSDMA0_GFX_RB_RPTR_ADDR_LO),

  lower_32_bits(adev->wb.gpu_addr + wb_offset) &

0xFFFC);

-rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,

RPTR_WRITEBACK_ENABLE, 1);

+rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+RPTR_WRITEBACK_ENABLE, 0);

   WREG32(sdma_v5_0_get_reg_offset(adev, i,

mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);

   WREG32(sdma_v5_0_get_reg_offset(adev, i,

mmSDMA0_GFX_RB_BASE_HI),

ring->gpu_addr >> 40);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re:Re: [PATCH] amd: display: dc: struct dc_state is declared twice

2021-03-30 Thread 万家兵
 
>On Sat, Mar 27, 2021 at 3:28 AM Wan Jiabing  wrote:
>>
>> struct dc_state has been declared at 273rd line.
>> Remove the duplicate.
>> Delete duplicate blank lines.
>
>Can you split these into separate patches?
>
>Alex

OK. But in fact, what I did  is simple.
The most important thing is removing the duplicate
struct dc_state declaration at 585th line.
Others are all deleting duplicate blank lines.

So maybe I should send two patchs, one is removing
duplicate declaration, the other is deleting blank lines?

>>
>> Signed-off-by: Wan Jiabing 
>> ---
>>  drivers/gpu/drm/amd/display/dc/dc.h | 10 --
>>  1 file changed, 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
>> b/drivers/gpu/drm/amd/display/dc/dc.h
>> index 18ed0d3f247e..dc667298ab5b 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dc.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
>> @@ -234,7 +234,6 @@ struct dc_static_screen_params {
>> unsigned int num_frames;
>>  };
>>
>> -
>>  /* Surface update type is used by dc_update_surfaces_and_stream
>>   * The update type is determined at the very beginning of the function based
>>   * on parameters passed in and decides how much programming (or updating) is
>> @@ -272,7 +271,6 @@ struct dc;
>>  struct dc_plane_state;
>>  struct dc_state;
>>
>> -
>>  struct dc_cap_funcs {
>> bool (*get_dcc_compression_cap)(const struct dc *dc,
>> const struct dc_dcc_surface_param *input,
>> @@ -281,7 +279,6 @@ struct dc_cap_funcs {
>>
>>  struct link_training_settings;
>>
>> -
>>  /* Structure to hold configuration flags set by dm at dc creation. */
>>  struct dc_config {
>> bool gpu_vm_support;
>> @@ -581,7 +578,6 @@ struct dc_bounding_box_overrides {
>> int min_dcfclk_mhz;
>>  };
>>
>> -struct dc_state;

Removing the duplicate is here.
And others are all deleting duplicate blank line.

I think they are in the same file. I want to remove the declaration first.
By the way, I deleted the blank line.

Yours,
Wan Jiabing

>>  struct resource_pool;
>>  struct dce_hwseq;
>>  struct gpu_info_soc_bounding_box_v1_0;
>> @@ -757,7 +753,6 @@ enum dc_transfer_func_predefined {
>> TRANSFER_FUNCTION_GAMMA26
>>  };
>>
>> -
>>  struct dc_transfer_func {
>> struct kref refcount;
>> enum dc_transfer_func_type type;
>> @@ -770,7 +765,6 @@ struct dc_transfer_func {
>> };
>>  };
>>
>> -
>>  union dc_3dlut_state {
>> struct {
>> uint32_t initialized:1; /*if 3dlut is went through 
>> color module for initialization */
>> @@ -784,7 +778,6 @@ union dc_3dlut_state {
>> uint32_t raw;
>>  };
>>
>> -
>>  struct dc_3dlut {
>> struct kref refcount;
>> struct tetrahedral_params lut_3d;
>> @@ -1014,7 +1007,6 @@ enum dc_status dc_validate_global_state(
>> struct dc_state *new_ctx,
>> bool fast_validate);
>>
>> -
>>  void dc_resource_state_construct(
>> const struct dc *dc,
>> struct dc_state *dst_ctx);
>> @@ -1167,7 +1159,6 @@ struct dc_container_id {
>> unsigned short productCode;
>>  };
>>
>> -
>>  struct dc_sink_dsc_caps {
>> // 'true' if these are virtual DPCD's DSC caps (immediately upstream 
>> of sink in MST topology),
>> // 'false' if they are sink's DSC caps
>> @@ -1229,7 +1220,6 @@ struct dc_cursor {
>> struct dc_cursor_attributes attributes;
>>  };
>>
>> -
>>  
>> /***
>>   * Interrupt interfaces
>>   
>> **/
>> --
>> 2.25.1
>>
>> ___
>> dri-devel mailing list
>> dri-de...@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

>-Original Message-
>From: Christian König 
>Sent: Tuesday, March 30, 2021 3:13 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12
>
>
>
>Am 30.03.21 um 06:41 schrieb Emily Deng:
>> It will hit ramdomly sdma hang, and pending on utcl2 address
>> translation when access the RPTR polling address.
>>
>> According sdma firmware team mentioned, the RPTR writeback is done by
>> hardware automatically, and will hit issue when clock gating occurs.
>> So stop using the rptr write back for sdma5.0.
>>
>> Signed-off-by: Emily Deng 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 18 --
>>   1 file changed, 12 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> index 920fc6d4a127..63e4a78181b8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> @@ -298,13 +298,19 @@ static void
>sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
>>*/
>>   static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
>>   {
>> -u64 *rptr;
>> +struct amdgpu_device *adev = ring->adev;
>> +u64 rptr;
>> +u32 lowbit, highbit;
>> +
>> +lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,
>mmSDMA0_GFX_RB_RPTR));
>> +highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me,
>> +mmSDMA0_GFX_RB_RPTR_HI));
>
>That won't work like this.
>
>We have the readpointer writeback because we otherwise can't guarantee
>that the two 32bit values read from the registers are coherent.
>
>In other words it can be that the hi rptr is already wrapped around while the
>lo is still the old value.
>
>Why exactly doesn't the writeback work?
>
>Christian.
Issue occurs, when occurs clockgating, at the same time, the rptr write back 
occurs. At this time, the utcl2 translation will hang.
>
>>
>> -/* XXX check if swapping is necessary on BE */
>> -rptr = ((u64 *)>adev->wb.wb[ring->rptr_offs]);
>> +rptr = highbit;
>> +rptr = rptr << 32;
>> +rptr |= lowbit;
>>
>> -DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
>> -return ((*rptr) >> 2);
>> +DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
>> +return (rptr >> 2);
>>   }
>>
>>   /**
>> @@ -702,7 +708,7 @@ static int sdma_v5_0_gfx_resume(struct
>amdgpu_device *adev)
>>   WREG32(sdma_v5_0_get_reg_offset(adev, i,
>mmSDMA0_GFX_RB_RPTR_ADDR_LO),
>>  lower_32_bits(adev->wb.gpu_addr + wb_offset) &
>0xFFFC);
>>
>> -rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
>RPTR_WRITEBACK_ENABLE, 1);
>> +rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
>> +RPTR_WRITEBACK_ENABLE, 0);
>>
>>   WREG32(sdma_v5_0_get_reg_offset(adev, i,
>mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
>>   WREG32(sdma_v5_0_get_reg_offset(adev, i,
>mmSDMA0_GFX_RB_BASE_HI),
>> ring->gpu_addr >> 40);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/6] drm/amdgpu: Disable RPTR write back for navi12

2021-03-30 Thread Christian König




Am 30.03.21 um 06:41 schrieb Emily Deng:

It will hit ramdomly sdma hang, and pending on utcl2
address translation when access the RPTR polling address.

According sdma firmware team mentioned, the RPTR writeback is done by
hardware automatically, and will hit issue when clock gating occurs. So
stop using the rptr write back for sdma5.0.

Signed-off-by: Emily Deng 
---
  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 18 --
  1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a127..63e4a78181b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -298,13 +298,19 @@ static void sdma_v5_0_ring_patch_cond_exec(struct 
amdgpu_ring *ring,
   */
  static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
  {
-   u64 *rptr;
+   struct amdgpu_device *adev = ring->adev;
+   u64 rptr;
+   u32 lowbit, highbit;
+
+   lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, 
mmSDMA0_GFX_RB_RPTR));
+   highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, 
mmSDMA0_GFX_RB_RPTR_HI));


That won't work like this.

We have the readpointer writeback because we otherwise can't guarantee 
that the two 32bit values read from the registers are coherent.


In other words it can be that the hi rptr is already wrapped around 
while the lo is still the old value.


Why exactly doesn't the writeback work?

Christian.

  
-	/* XXX check if swapping is necessary on BE */

-   rptr = ((u64 *)>adev->wb.wb[ring->rptr_offs]);
+   rptr = highbit;
+   rptr = rptr << 32;
+   rptr |= lowbit;
  
-	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);

-   return ((*rptr) >> 2);
+   DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
+   return (rptr >> 2);
  }
  
  /**

@@ -702,7 +708,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
WREG32(sdma_v5_0_get_reg_offset(adev, i, 
mmSDMA0_GFX_RB_RPTR_ADDR_LO),
   lower_32_bits(adev->wb.gpu_addr + wb_offset) & 
0xFFFC);
  
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);

+   rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 
RPTR_WRITEBACK_ENABLE, 0);
  
  		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);

WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), 
ring->gpu_addr >> 40);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Christian König

Good morning,

yes Jiansong is right that patch is really not a good idea.

Moving buffers can indeed happen during shutdown while some memory is 
still referenced.


Just ignoring the move is not the right approach, you need to find out 
why the memory is moved in the first place.


You could add something like WARN_ON(adev->shutdown);

Regards,
Christian.

Am 30.03.21 um 09:05 schrieb Deng, Emily:

[AMD Official Use Only - Internal Distribution Only]

Hi Jiansong,
  It does happen,  maybe have the race condition?


Best wishes
Emily Deng




-Original Message-
From: Chen, Jiansong (Simon) 
Sent: Tuesday, March 30, 2021 2:49 PM
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Cc: Deng, Emily 
Subject: RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

[AMD Official Use Only - Internal Distribution Only]

I still wonder how the issue takes place? According to my humble knowledge
in driver model, the reference count of the kobject for the device will not
reach zero when there is still some device mem access, and shutdown should
not happen.

Regards,
Jiansong
-Original Message-
From: amd-gfx  On Behalf Of Emily
Deng
Sent: Tuesday, March 30, 2021 12:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deng, Emily 
Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

During driver unloading, don't need to copy mem, or it will introduce some
call trace, such as when sa_manager is freed, it will introduce warn call trace
in amdgpu_sa_bo_new.

Signed-off-by: Emily Deng 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..f0546a489e0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct
amdgpu_device *adev,  struct dma_fence *fence = NULL;  int r = 0;

+if (adev->shutdown)
+return 0;
+
if (!adev->mman.buffer_funcs_enabled) {  DRM_ERROR("Trying to move
memory with ring turned off.\n");  return -EINVAL;
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.fr
eedesktop.org%2Fmailman%2Flistinfo%2Famd-
gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b96247
6a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7
C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
a=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved
=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Deng, Emily
[AMD Official Use Only - Internal Distribution Only]

Hi Jiansong,
 It does happen,  maybe have the race condition?


Best wishes
Emily Deng



>-Original Message-
>From: Chen, Jiansong (Simon) 
>Sent: Tuesday, March 30, 2021 2:49 PM
>To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
>[AMD Official Use Only - Internal Distribution Only]
>
>I still wonder how the issue takes place? According to my humble knowledge
>in driver model, the reference count of the kobject for the device will not
>reach zero when there is still some device mem access, and shutdown should
>not happen.
>
>Regards,
>Jiansong
>-Original Message-
>From: amd-gfx  On Behalf Of Emily
>Deng
>Sent: Tuesday, March 30, 2021 12:42 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue
>
>During driver unloading, don't need to copy mem, or it will introduce some
>call trace, such as when sa_manager is freed, it will introduce warn call trace
>in amdgpu_sa_bo_new.
>
>Signed-off-by: Emily Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
> 1 file changed, 3 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>index e00263bcc88b..f0546a489e0d 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>@@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct
>amdgpu_device *adev,  struct dma_fence *fence = NULL;  int r = 0;
>
>+if (adev->shutdown)
>+return 0;
>+
> if (!adev->mman.buffer_funcs_enabled) {  DRM_ERROR("Trying to move
>memory with ring turned off.\n");  return -EINVAL;
>--
>2.25.1
>
>___
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.fr
>eedesktop.org%2Fmailman%2Flistinfo%2Famd-
>gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b96247
>6a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7
>C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
>MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdat
>a=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved
>=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: 回复: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-30 Thread Christian König

Hi Monk,

yeah, that's what I can certainly agree on.

My primary concern is that I'm not convinced that we don't get problems 
at other places if we just add another band aid.


We already had this back and forth multiple times now and while we are 
currently under time pressure we will be under even more time pressure 
when a customer is running into other issues and we are still circling 
around the same fundamental problem.


Regards,
Christian.

Am 30.03.21 um 05:10 schrieb Liu, Monk:

[AMD Official Use Only - Internal Distribution Only]

Hi Christian,

We don't need to debate on the design's topic, each of us have our own opinion, 
it is hard to persuade others sometimes, again with more and more features and 
requirements it is pretty normal that an old design need to
Refine and or even rework to satisfy all those needs, so I'm not trying to 
argue with you that we don't need a better rework, that's also pleasure me .

In the moment, the more important thing I care is the solution because SRIOV 
project still try best to put all changes into upstreaming tree, we don't want 
to fork another tree unless no choice ...

Let's have a sync in another thread

Thanks for you help on this

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Koenig, Christian 
Sent: Friday, March 26, 2021 10:51 PM
To: Liu, Monk ; Zhang, Jack (Jian) ; Grodzovsky, Andrey 
; Christian König ; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; Deng, Emily ; Rob Herring ; Tomeu Vizoso 
; Steven Price 
Cc: Zhang, Andy ; Jiang, Jerry (SW) 
Subject: Re: 回复: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

Hi Monk,

I can't disagree more.

The fundamental problem here is that we have pushed a design without validating 
if it really fits into the concepts the Linux kernel mandates here.

My mistake was that I haven't pushed back hard enough on the initial design 
resulting in numerous cycles of trying to save the design while band aiding the 
flaws which became obvious after a while.

I haven't counted them but I think we are now already had over 10 patches which 
try to work around lifetime issues of the job object because I wasn't able to 
properly explain why this isn't going to work like this.

Because of this I will hard reject any attempt to band aid this issue even more 
which isn't starting over again with a design which looks like it is going to 
work.

Regards,
Christian.

Am 26.03.21 um 12:21 schrieb Liu, Monk:

[AMD Official Use Only - Internal Distribution Only]

Hi Christian

This is not correct or correct perspective, any design comes with its
pros and cons, otherwise it wouldn't comes to kernel tree in the very
beginning , it is just with time passed we have more and more
requirement and feature need to implement And those new requirement
drags many new solution or idea, and some idea you prefer need to
based on a new infrastructure, that's all

I don't why the job "should be" or not "should be" in the scheduler,
honestly speaking I can argue with you that the "scheduler" and the TDR feature which 
invented by AMD developer "should" never escalate to drm layer at all and by that 
assumption Those vendor's compatibilities headache right now won't happen at all.

Let's just focus on the issue so far.

The solution Andrey and Jack doing right now looks good to me, and it
can solve our problems without introducing regression from a surface
look, but it is fine if you need a neat solution,  since we have our
project pressure (which we always have) Either we implement the first
version with Jack's patch and do the revise in another series of
patches (that also my initial suggestion) or we rework anything you
mentioned, but since looks to me you are from time to time asking
people to rework Something in the stage that people already have a
solution, which frustrated people a lot,

I would like you do prepare a solution for us, which solves our
headaches ...  I really don't want to see you asked Jack to rework again and 
again If you are out of bandwidth or no interest in doing this ,please at least 
make your solution/proposal very detail and clear, jack told me he couldn't 
understand your point here.

Thanks very much, and please understand our painful here

/Monk


-邮件原件-
发件人: Koenig, Christian 
发送时间: 2021年3月26日 17:06
收件人: Zhang, Jack (Jian) ; Grodzovsky, Andrey
; Christian König
; dri-de...@lists.freedesktop.org;
amd-gfx@lists.freedesktop.org; Liu, Monk ; Deng,
Emily ; Rob Herring ; Tomeu
Vizoso ; Steven Price

主题: Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid
memleak

Hi guys,

Am 26.03.21 um 03:23 schrieb Zhang, Jack (Jian):

[AMD Official Use Only - Internal Distribution Only]

Hi, Andrey,


how u handle non guilty singnaled jobs in drm_sched_stop, currently
looks like you don't call put for them and just explicitly free
them as before

Good point, I missed that place. 

RE: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

2021-03-30 Thread Chen, Jiansong (Simon)
[AMD Official Use Only - Internal Distribution Only]

I still wonder how the issue takes place? According to my humble knowledge in 
driver model, the reference count of the kobject
for the device will not reach zero when there is still some device mem access, 
and shutdown should not happen.

Regards,
Jiansong
-Original Message-
From: amd-gfx  On Behalf Of Emily Deng
Sent: Tuesday, March 30, 2021 12:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deng, Emily 
Subject: [PATCH 6/6] drm/amdgpu: Fix driver unload issue

During driver unloading, don't need to copy mem, or it will introduce some call 
trace, such as when sa_manager is freed, it will introduce warn call trace in 
amdgpu_sa_bo_new.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e00263bcc88b..f0546a489e0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -317,6 +317,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 struct dma_fence *fence = NULL;
 int r = 0;

+if (adev->shutdown)
+return 0;
+
 if (!adev->mman.buffer_funcs_enabled) {
 DRM_ERROR("Trying to move memory with ring turned off.\n");
 return -EINVAL;
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7CJiansong.Chen%40amd.com%7C1b4c71d7b962476a367508d8f3362f40%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637526761354532311%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=RxRnZW0fmwjKSGMN1nf6kIHRdAPVs9J5OBluDYhR6vQ%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx