[PATCH] drm/amdgpu: Poweroff uvd/vce/vcn/acp block if they were disabled by user
If user disable uvd/vce/vcn/acp blocks via module parameter ip_block_mask, driver power off thoser blocks to save power. Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e4dd09..3ffee08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1774,6 +1774,24 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power for (j = 0; j < adev->num_ip_blocks; j++) { i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; + + /* try to power off VCE/UVD/VCN/ACP if they were disabled by user */ + if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_UVD || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_VCE || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_VCN || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) && + adev->ip_blocks[i].version->funcs->set_powergating_state) { + if (!adev->ip_blocks[i].status.valid) { + r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + state); + if (r) { + DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + } + if (!adev->ip_blocks[i].status.late_initialized) continue; /* skip CG for VCE/UVD, it's handled specially */ @@ -1791,6 +1809,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power } } } + return 0; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 3/3] drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields
Series reviewed by: Feifei Xu Regards, Feifei -Original Message- From: Evan Quan Sent: 2018年10月16日 10:52 To: amd-gfx@lists.freedesktop.org Cc: Xu, Feifei ; Deucher, Alexander ; Quan, Evan Subject: [PATCH 3/3] drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields Update the PPtable structure to fit the latest SMC firmware. Change-Id: I97db5955085efa1ecf44ae23d26fdcc70ec2fc9a Signed-off-by: Evan Quan --- .../amd/powerplay/hwmgr/vega20_processpptables.c| 10 ++ drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h | 13 - 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c index e71740479bb8..e5f7f8230065 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c @@ -100,9 +100,8 @@ static void dump_pptable(PPTable_t *pptable) pr_info("PpmTemperatureThreshold = %d\n", pptable->PpmTemperatureThreshold); pr_info("MemoryOnPackage = 0x%02x\n", pptable->MemoryOnPackage); - pr_info("padding8_limits[0] = 0x%02x\n", pptable->padding8_limits[0]); - pr_info("padding8_limits[1] = 0x%02x\n", pptable->padding8_limits[1]); - pr_info("padding8_limits[2] = 0x%02x\n", pptable->padding8_limits[2]); + pr_info("padding8_limits = 0x%02x\n", pptable->padding8_limits); + pr_info("Tvr_SocLimit = %d\n", pptable->Tvr_SocLimit); pr_info("UlvVoltageOffsetSoc = %d\n", pptable->UlvVoltageOffsetSoc); pr_info("UlvVoltageOffsetGfx = %d\n", pptable->UlvVoltageOffsetGfx); @@ -539,7 +538,10 @@ static void dump_pptable(PPTable_t *pptable) pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); - for (i = 0; i < 12; i++) + pr_info("DcBtcGb[AVFS_VOLTAGE_GFX] = 0x%x\n", pptable->DcBtcGb[AVFS_VOLTAGE_GFX]); + pr_info("DcBtcGb[AVFS_VOLTAGE_SOC] = 0x%x\n", +pptable->DcBtcGb[AVFS_VOLTAGE_SOC]); + + for (i = 0; i < 11; i++) pr_info("Reserved[%d] = 0x%x\n", i, pptable->Reserved[i]); for (i = 0; i < 3; i++) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h index c72cfab83df9..2998a49960ed 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h @@ -165,7 +165,7 @@ #define FEATURE_DS_FCLK_MASK(1 << FEATURE_DS_FCLK_BIT) #define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT ) #define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT ) - +#define FEATURE_XGMI_MASK (1 << FEATURE_XGMI_BIT ) #define DPM_OVERRIDE_DISABLE_SOCCLK_PID 0x0001 #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x0002 @@ -391,8 +391,8 @@ typedef struct { uint16_t PpmTemperatureThreshold; uint8_t MemoryOnPackage; - uint8_t padding8_limits[3]; - + uint8_t padding8_limits; + uint16_t Tvr_SocLimit; uint16_t UlvVoltageOffsetSoc; uint16_t UlvVoltageOffsetGfx; @@ -501,7 +501,7 @@ typedef struct { uint8_t DcBtcEnabled[AVFS_VOLTAGE_COUNT]; uint8_t Padding8_GfxBtc[2]; - uint16_t DcBtcMin[AVFS_VOLTAGE_COUNT]; + int16_t DcBtcMin[AVFS_VOLTAGE_COUNT]; uint16_t DcBtcMax[AVFS_VOLTAGE_COUNT]; @@ -526,7 +526,10 @@ typedef struct { uint16_t FanGainVrMem0; uint16_t FanGainVrMem1; - uint32_t Reserved[12]; + + uint16_t DcBtcGb[AVFS_VOLTAGE_COUNT]; + + uint32_t Reserved[11]; uint32_t Padding32[3]; -- 2.19.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amd/powerplay: added I2C controller configuration
PPTABLE structure is stretched to add I2C controller configuration. Hold on the PPTABLE_V20_SMU_VERSION bump until the VBIOS is ready. Change-Id: I079154b36e4bddba9fa40ce3abc6517ad9e9b5f1 Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/include/atomfirmware.h| 88 ++ .../powerplay/hwmgr/vega20_processpptables.c | 94 +-- .../drm/amd/powerplay/inc/smu11_driver_if.h | 108 +- 3 files changed, 227 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 8ae7adb7329b..d2e7c0fa96c2 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1532,6 +1532,94 @@ struct atom_smc_dpm_info_v4_3 uint32_t boardreserved[10]; }; +struct smudpm_i2ccontrollerconfig_t { + uint32_t enabled; + uint32_t slaveaddress; + uint32_t controllerport; + uint32_t controllername; + uint32_t thermalthrottler; + uint32_t i2cprotocol; + uint32_t i2cspeed; +}; + +struct atom_smc_dpm_info_v4_4 +{ + struct atom_common_table_header table_header; + uint32_t i2c_padding[3]; + + uint16_t maxvoltagestepgfx; + uint16_t maxvoltagestepsoc; + + uint8_t vddgfxvrmapping; + uint8_t vddsocvrmapping; + uint8_t vddmem0vrmapping; + uint8_t vddmem1vrmapping; + + uint8_t gfxulvphasesheddingmask; + uint8_t soculvphasesheddingmask; + uint8_t externalsensorpresent; + uint8_t padding8_v; + + uint16_t gfxmaxcurrent; + uint8_t gfxoffset; + uint8_t padding_telemetrygfx; + + uint16_t socmaxcurrent; + uint8_t socoffset; + uint8_t padding_telemetrysoc; + + uint16_t mem0maxcurrent; + uint8_t mem0offset; + uint8_t padding_telemetrymem0; + + uint16_t mem1maxcurrent; + uint8_t mem1offset; + uint8_t padding_telemetrymem1; + + + uint8_t acdcgpio; + uint8_t acdcpolarity; + uint8_t vr0hotgpio; + uint8_t vr0hotpolarity; + + uint8_t vr1hotgpio; + uint8_t vr1hotpolarity; + uint8_t padding1; + uint8_t padding2; + + + uint8_t ledpin0; + uint8_t ledpin1; + uint8_t ledpin2; + uint8_t padding8_4; + + + uint8_t pllgfxclkspreadenabled; + uint8_t pllgfxclkspreadpercent; + uint16_t pllgfxclkspreadfreq; + + + uint8_t uclkspreadenabled; + uint8_t uclkspreadpercent; + uint16_t uclkspreadfreq; + + + uint8_t fclkspreadenabled; + uint8_t fclkspreadpercent; + uint16_t fclkspreadfreq; + + + uint8_t fllgfxclkspreadenabled; + uint8_t fllgfxclkspreadpercent; + uint16_t fllgfxclkspreadfreq; + + + struct smudpm_i2ccontrollerconfig_t i2ccontrollers[7]; + + + uint32_t boardreserved[10]; +}; + /* *** Data Table asic_profiling_info structure diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c index 32fe38452094..e71740479bb8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c @@ -417,8 +417,8 @@ static void dump_pptable(PPTable_t *pptable) pr_info("FanGainEdge = %d\n", pptable->FanGainEdge); pr_info("FanGainHotspot = %d\n", pptable->FanGainHotspot); pr_info("FanGainLiquid = %d\n", pptable->FanGainLiquid); - pr_info("FanGainVrVddc = %d\n", pptable->FanGainVrVddc); - pr_info("FanGainVrMvdd = %d\n", pptable->FanGainVrMvdd); + pr_info("FanGainVrGfx = %d\n", pptable->FanGainVrGfx); + pr_info("FanGainVrSoc = %d\n", pptable->FanGainVrSoc); pr_info("FanGainPlx = %d\n", pptable->FanGainPlx); pr_info("FanGainHbm = %d\n", pptable->FanGainHbm); pr_info("FanPwmMin = %d\n", pptable->FanPwmMin); @@ -533,23 +533,17 @@ static void dump_pptable(PPTable_t *pptable) pr_info("MinVoltageUlvGfx = %d\n", pptable->MinVoltageUlvGfx); pr_info("MinVoltageUlvSoc = %d\n", pptable->MinVoltageUlvSoc); - for (i = 0; i < 14; i++) - pr_info("Reserved[%d] = 0x%x\n", i, pptable->Reserved[i]); + pr_info("MGpuFanBoostLimitRpm = %d\n", pptable->MGpuFanBoostLimitRpm); + pr_info("padding16_Fan = %d\n", pptable->padding16_Fan); - pr_info("Liquid1_I2C_address = 0x%x\n", pptable->Liquid1_I2C_address); - pr_info("Liquid2_I2C_address = 0x%x\n", pptable->Liquid2_I2C_address); - pr_info("Vr_I2C_address = 0x%x\n", pptable->Vr_I2C_address); - pr_info("Plx_I2C_address = 0x%x\n", pptable->Plx_I2C_address); + pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); + pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); - pr_info("Liquid_I2C_LineSCL = 0x%x\n", pptable->Liquid_I2C_LineSCL); - pr_info("Liquid_I2C_LineSDA = 0x%x\n", pptable->Liquid_I2C_LineSDA); - pr_info("Vr_I2C_LineSCL = 0x%x\n", pptable->Vr_I2C_LineSCL); - pr_info("Vr_I2C_LineSDA = 0x%x\n", pptable->Vr_I2C_LineSDA); + for (i = 0; i < 12; i++) +
[PATCH 3/3] drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields
Update the PPtable structure to fit the latest SMC firmware. Change-Id: I97db5955085efa1ecf44ae23d26fdcc70ec2fc9a Signed-off-by: Evan Quan --- .../amd/powerplay/hwmgr/vega20_processpptables.c| 10 ++ drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h | 13 - 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c index e71740479bb8..e5f7f8230065 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c @@ -100,9 +100,8 @@ static void dump_pptable(PPTable_t *pptable) pr_info("PpmTemperatureThreshold = %d\n", pptable->PpmTemperatureThreshold); pr_info("MemoryOnPackage = 0x%02x\n", pptable->MemoryOnPackage); - pr_info("padding8_limits[0] = 0x%02x\n", pptable->padding8_limits[0]); - pr_info("padding8_limits[1] = 0x%02x\n", pptable->padding8_limits[1]); - pr_info("padding8_limits[2] = 0x%02x\n", pptable->padding8_limits[2]); + pr_info("padding8_limits = 0x%02x\n", pptable->padding8_limits); + pr_info("Tvr_SocLimit = %d\n", pptable->Tvr_SocLimit); pr_info("UlvVoltageOffsetSoc = %d\n", pptable->UlvVoltageOffsetSoc); pr_info("UlvVoltageOffsetGfx = %d\n", pptable->UlvVoltageOffsetGfx); @@ -539,7 +538,10 @@ static void dump_pptable(PPTable_t *pptable) pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); pr_info("FanGainVrMem0 = %d\n", pptable->FanGainVrMem0); - for (i = 0; i < 12; i++) + pr_info("DcBtcGb[AVFS_VOLTAGE_GFX] = 0x%x\n", pptable->DcBtcGb[AVFS_VOLTAGE_GFX]); + pr_info("DcBtcGb[AVFS_VOLTAGE_SOC] = 0x%x\n", pptable->DcBtcGb[AVFS_VOLTAGE_SOC]); + + for (i = 0; i < 11; i++) pr_info("Reserved[%d] = 0x%x\n", i, pptable->Reserved[i]); for (i = 0; i < 3; i++) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h index c72cfab83df9..2998a49960ed 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h @@ -165,7 +165,7 @@ #define FEATURE_DS_FCLK_MASK(1 << FEATURE_DS_FCLK_BIT) #define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT ) #define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT ) - +#define FEATURE_XGMI_MASK (1 << FEATURE_XGMI_BIT ) #define DPM_OVERRIDE_DISABLE_SOCCLK_PID 0x0001 #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x0002 @@ -391,8 +391,8 @@ typedef struct { uint16_t PpmTemperatureThreshold; uint8_t MemoryOnPackage; - uint8_t padding8_limits[3]; - + uint8_t padding8_limits; + uint16_t Tvr_SocLimit; uint16_t UlvVoltageOffsetSoc; uint16_t UlvVoltageOffsetGfx; @@ -501,7 +501,7 @@ typedef struct { uint8_t DcBtcEnabled[AVFS_VOLTAGE_COUNT]; uint8_t Padding8_GfxBtc[2]; - uint16_t DcBtcMin[AVFS_VOLTAGE_COUNT]; + int16_t DcBtcMin[AVFS_VOLTAGE_COUNT]; uint16_t DcBtcMax[AVFS_VOLTAGE_COUNT]; @@ -526,7 +526,10 @@ typedef struct { uint16_t FanGainVrMem0; uint16_t FanGainVrMem1; - uint32_t Reserved[12]; + + uint16_t DcBtcGb[AVFS_VOLTAGE_COUNT]; + + uint32_t Reserved[11]; uint32_t Padding32[3]; -- 2.19.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdgpu: update Vega20 SDMA golden setting
Update SDMA golden settings. Change-Id: Icffa86e1a2c057466e1280fb195070c769d51eab Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c20d413f277c..04fa3d972636 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -148,6 +148,7 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffd, 0x0001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x03ff, 0x03c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE00, 0x), }; static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { @@ -177,6 +178,7 @@ static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffd, 0x0001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x03ff, 0x03c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE00, 0x), }; static const struct soc15_reg_golden golden_settings_sdma_rv1[] = -- 2.19.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [RFC] drm/amd/display: add SI support to AMD DC
On 2018-10-15 5:06 p.m., Harry Wentland wrote: > On 2018-10-14 5:47 p.m., Mauro Rossi wrote: >> Hi, >> >> reporting about some progress made during the weekend, >> thanks to Sylvain feedback & suggestions. >> >> I have rebased and updated the series on top of >> https://cgit.freedesktop.org/~agd5f/linux/?h=amd-staging-drm-next Here is the amd_dc_si branch: >> https://github.com/maurossi/linux/tree/amd_dc_si (uploading) >> NOTE: arch/x86/kernel/tsc.c changes for 4K display modes are not >> there, as they are not strictly needed for amd-gfx >> What updates do you have to your original series? If there are substantial updates can you send a v2 of your series? If not I'll go through the v1. The branch is great for review but email patches are easier for commenting. >> Copying also Harry, Alex, Christian and Mike in order to get some >> objective and infallible >> clues/feedbacks about blocking points and about "no care" items. >> >> Please, also big things I may have missed. >> M. >> >>> On Mon, Oct 8, 2018 at 11:23 PM wrote: >>> >>> Sylvain - I did hack a bit your patch set on amd-staging-drm-next to make >>> it go through the >>> asic init and I managed to get a x11 display with lines kind of garbled, but >>> you can still understand easily what's on the screen. >> >> I forgot to mention that since I'm gorgeously trying AMD DC also on Mullins >> I have reverted d9fda24 (""drm/amdgpu: Don't default to DC support for >> Kaveri and older") >> because on Mullins I can boot with HDMI and HDMI-to-VGA converter >> >> I was hoping for AMD DC being re-enabled for Kaveri and older, >> but I'm available to submit new version of specific patch if required. >> > > I still need to find time to get through your patchset properly. Just a quick > note on this. There are Kabini/Kaveri ASICs with VGA connectors in the > market, which the DC code doesn't support. If someone writes it we can > re-enable it by default. > > Either way, you can revert that patch for your tree or use amdgpu.dc=0 as > long as you're aware that VGA won't work with amdgpu on such a kernel. > > Harry > >>> Sylvain - ... The lines may be garbled in your driver code because, >>> if I recall properly, "line buffer" programing in dce8 is not >>> the same than in dce6 (look for registers with the "LB" abbreviation). Or >>> some >>> slight differences in frame buffer tiling. >> >> So the problem could be related to some kind of scan line or tiling >> buffer issue, >> at the moment the dce_resouces model is grabbed "AS IS" from DCE8 >> registers/masks >> We should probably update the DCE6 headers and use those. Sometimes register addresses change in subtle ways and cause problems later on that are hard to spot. >>> >>> Sylvain - I checked the kernel log, and like you said, I got errors in >>> DM_PPLIB due to an >>> invalid powerlevel and atombios/vbios table parsing regarding connectors. >>> general dpm is in amdgpu(no DC) for SI, it means the DCE related dpm part in >>> current SI amdgpu code path should be "copied" in DC. It is related very >>> probably to the parsing of VBIOS/ATOMBIOS tables. >> >> 10-09 21:10:14.427 0 0 E : >> [drm:dm_pp_get_static_clocks [amdgpu]] *ERROR* DM_PPLIB: invalid >> powerlevel state: 0! >> >> NOTE: the error is the result of Powerplay dependency introduced by >> using AMD DC for SI >> it's not fatal and it does not seem to affect performance in the Benchmarks >> >> DOUBT: I think that it would make sense to set "power level 0" i.e. >> the "lower state" as safe default, >> considering that powerplay smu6/hwmgr are not implemented for SI and >> smu7 CIK functions do not work, >> the AS-IS dpm is the only available option. (and it seams to be >> working, looking at the framerates 250-280 in the V1 Vulkan benchmark) >> I wouldn't worry about this too much for now. DC really just wants powerplay. Might make sense to just silence the error print if things seem to work otherwise. Great work in getting things up and running with DCE6 and thanks for sending patches to get this work upstreamed. Harry >> >> >>> 10-09 21:10:14.427 0 0 W [drm] dce110_link_encoder_construct: >>> Failed to get encoder_cap_info from VBIOS with error code 4! >>> 10-09 21:10:14.427 0 0 W [drm] dce110_link_encoder_construct: >>> Failed to get encoder_cap_info from VBIOS with error code 4! >> >> NOTE: the warning also appears with Tonga and Vega, it is a Warning >> and does not seem to cause issues, so I would assume there is a >> default treatment in place, >> is this related to missing encoder for drm crtc or to other kind of encoder? >> >>> Sylvain - Did add SI handling in some raven firmware loader function. >>> In drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c, "load_dmcu_fw" >>> function augmented with SI chip asic_type. >> >> I've merged the change in the (v2) branch >> https://github.com/maurossi/linux/tree/amd_dc_si >> >>> Sylvain - AFAIK, the real thing that you additionally get with
Re: [RFC] drm/amd/display: add SI support to AMD DC
On 2018-10-14 5:47 p.m., Mauro Rossi wrote: > Hi, > > reporting about some progress made during the weekend, > thanks to Sylvain feedback & suggestions. > > I have rebased and updated the series on top of > https://cgit.freedesktop.org/~agd5f/linux/?h=amd-staging-drm-next > > Here is the amd_dc_si branch: > https://github.com/maurossi/linux/tree/amd_dc_si (uploading) > NOTE: arch/x86/kernel/tsc.c changes for 4K display modes are not > there, as they are not strictly needed for amd-gfx > > Copying also Harry, Alex, Christian and Mike in order to get some > objective and infallible > clues/feedbacks about blocking points and about "no care" items. > > Please, also big things I may have missed. > M. > >> On Mon, Oct 8, 2018 at 11:23 PM wrote: >> >> Sylvain - I did hack a bit your patch set on amd-staging-drm-next to make it >> go through the >> asic init and I managed to get a x11 display with lines kind of garbled, but >> you can still understand easily what's on the screen. > > I forgot to mention that since I'm gorgeously trying AMD DC also on Mullins > I have reverted d9fda24 (""drm/amdgpu: Don't default to DC support for > Kaveri and older") > because on Mullins I can boot with HDMI and HDMI-to-VGA converter > > I was hoping for AMD DC being re-enabled for Kaveri and older, > but I'm available to submit new version of specific patch if required. > I still need to find time to get through your patchset properly. Just a quick note on this. There are Kabini/Kaveri ASICs with VGA connectors in the market, which the DC code doesn't support. If someone writes it we can re-enable it by default. Either way, you can revert that patch for your tree or use amdgpu.dc=0 as long as you're aware that VGA won't work with amdgpu on such a kernel. Harry >> Sylvain - ... The lines may be garbled in your driver code because, >> if I recall properly, "line buffer" programing in dce8 is not >> the same than in dce6 (look for registers with the "LB" abbreviation). Or >> some >> slight differences in frame buffer tiling. > > So the problem could be related to some kind of scan line or tiling > buffer issue, > at the moment the dce_resouces model is grabbed "AS IS" from DCE8 > registers/masks > >> >> Sylvain - I checked the kernel log, and like you said, I got errors in >> DM_PPLIB due to an >> invalid powerlevel and atombios/vbios table parsing regarding connectors. >> general dpm is in amdgpu(no DC) for SI, it means the DCE related dpm part in >> current SI amdgpu code path should be "copied" in DC. It is related very >> probably to the parsing of VBIOS/ATOMBIOS tables. > > 10-09 21:10:14.427 0 0 E : > [drm:dm_pp_get_static_clocks [amdgpu]] *ERROR* DM_PPLIB: invalid > powerlevel state: 0! > > NOTE: the error is the result of Powerplay dependency introduced by > using AMD DC for SI > it's not fatal and it does not seem to affect performance in the Benchmarks > > DOUBT: I think that it would make sense to set "power level 0" i.e. > the "lower state" as safe default, > considering that powerplay smu6/hwmgr are not implemented for SI and > smu7 CIK functions do not work, > the AS-IS dpm is the only available option. (and it seams to be > working, looking at the framerates 250-280 in the V1 Vulkan benchmark) > > > >> 10-09 21:10:14.427 0 0 W [drm] dce110_link_encoder_construct: Failed >> to get encoder_cap_info from VBIOS with error code 4! >> 10-09 21:10:14.427 0 0 W [drm] dce110_link_encoder_construct: Failed >> to get encoder_cap_info from VBIOS with error code 4! > > NOTE: the warning also appears with Tonga and Vega, it is a Warning > and does not seem to cause issues, so I would assume there is a > default treatment in place, > is this related to missing encoder for drm crtc or to other kind of encoder? > >> Sylvain - Did add SI handling in some raven firmware loader function. >> In drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c, "load_dmcu_fw" >> function augmented with SI chip asic_type. > > I've merged the change in the (v2) branch > https://github.com/maurossi/linux/tree/amd_dc_si > >> Sylvain - AFAIK, the real thing that you additionally get with DC is >> freesync. But >> freesync is actually going to be interesting only if displays are able to >> get their sync range lower bound to 0, and get significant power saving >> thanks to this. For the use case of very low display refresh rate I don't >> even >> think displayport or hdmi can do that, and be power friendly (you would have >> to retrain the link probably each time you send a framebuffer to the >> display). > > > If freesync is about reducing the framerate rate for power saving, > provided that I've seen it be mentioned the first time for GCN 2nd generation, > I'm not expecting freesync as a mandatory capability for the series. > >> Mauro -- dce60_resources was having too many building errors due to missing >> DCE6 macros >> in order to temporarily overcome the problem
Re: [RFC] drm/amd/display: add SI support to AMD DC
There are two sets of power management code in amdgpu, the older dpm code which was ported from radeon, and the newer powerplay code which was rewritten to align closer with the power management code for other OSes and the hw teams. The "powerplay" code has more features than the older dpm code, but at a fundamental level, they are pretty much the same. Both use the same SMU interfaces to support dynamic clock switching. SI is a bit tricky because it uses and older version of the SMU and an older power management design so it doesn't really match cleanly with the current APIs in the "powerplay" code. Alex From: sylvain.bertr...@gmail.com Sent: Monday, October 15, 2018 8:45 AM To: Mauro Rossi Cc: amd-gfx@lists.freedesktop.org; Wentland, Harry; Deucher, Alexander; Mike Lothian Subject: Re: [RFC] drm/amd/display: add SI support to AMD DC On Mon, Oct 15, 2018 at 07:28:57AM +0200, Mauro Rossi wrote: > dpm for SI is available, while powerplay for SI is not, but > display/amdgpu_dm uses some powerplay calls, where get_static_clock > functions not available and the *ERROR* DM_PPLIB is due to missing handling > in powerplay I though powerplay was just more power states for the dpm state machines to play with. I recall parsing ATOMBIOS pplay tables for records of (voltages/clocks frequencies). Maybe the future is to trust the OS kernel and perform all "low speed" 3D asic functions there and keep only high speed 3D operations in the asic. -- Sylvain ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 2/3] drm: Add variable refresh property to DRM CRTC
On 10/15/2018 09:57 AM, Pekka Paalanen wrote: On Fri, 12 Oct 2018 08:58:23 -0400 "Kazlauskas, Nicholas" wrote: On 10/12/2018 07:20 AM, Koenig, Christian wrote: Am 12.10.2018 um 11:21 schrieb Pekka Paalanen: On Fri, 12 Oct 2018 07:35:57 + "Koenig, Christian" wrote: Am 12.10.2018 um 09:23 schrieb Pekka Paalanen: On Wed, 10 Oct 2018 09:35:50 -0400 "Kazlauskas, Nicholas" wrote: The patches I've put out target this use case mostly because of the benefit for a relatively simple interface. VRR can and has been used in more ways that this, however. An example usecase that differs from this would actually be video playback. The monitor's refresh rate likely doesn't align with the video content rate. An API that exposes direct control over VRR (like the range, refresh duration, presentation timestamp) could allow the application to specify the content rate directly to deliver a smoother playback experience. For example, if you had a 24fps video and the VRR range was 40-60Hz you could target 48Hz via some API here. The way that has been discussed to be implemented is that DRM page flips would carry a target timestamp, which the driver will then meet as good as it can. It is better to define an absolute target timestamp than a frequency, because a timestamp can be used to synchronize with audio and more. Mario Kleiner can tell you all about scientific use cases that require accurate display timing, not just frequency. To summarize what information should be provided by the driver stack to make applications happy: 1. The minimum time a frame can be displayed, in other words the maximum frame rate. 2. The maximum time a frame can be displayed, in other words the minimum frame rate. 3. How much change of frame timing is allowed between frames to avoid luminescence flickering. Number 1 and 2 can also be used to signal the availability of VRR to applications, e.g. if they are identical we don't support VRR at all. Hi Christian, "the maximum time a frame can be displayed" is perhaps not an unambiguous definition. A frame can be shown indefinitely in any case. Good point. Please also note that I'm not an expert on the display stuff in general. My background comes more from implementing the VDPAU mediaplayer interface in mesa. So just throwing some ideas in here from the point of view of an userspace developer which wants to write a media player :) Excellent! The CRTC will simply start scanning out the same frame again if there is no new one. I understand what you want to say, but perhaps some different words will be in order. I do wonder if applications really want to know the maximum acceptable slew rate in timings... maybe that should be left for the drivers to apply. What I'm thinking is that we have the page flip timestamp with the page flip events to tell when the new FB became active. That information could be extended with a time range on when the very next flip could take place. Applications are already computing that prediction from the flip timestamp and fixed refresh rate, but it might be nice to give them the driver's opinion explicitly. Maybe the tolerable slew rate is not a constant. Well it depends. I agree that the kernel should probably enforce the slew rate to avoid flickering for the user. That's what I was hoping if the monitor hardware does not do that already, but now it sounds like it's not possible. Another failed assumption from my side. But it might be beneficial for the application to know things like this. Applications should know when they could likely flip, my question is how to tell them. Is the acceptable slew rate a constant for a video mode, or does it depend on the previous refresh interval. What the application definitely needs to know is when a frame was actually displayed. E.g. application says I want to display this at point X and at some point later the kernel returns saying the frame was displayed at point N where in the ideal case N=X. You already get this timestamp with the DRM page flip events. We also have Wayland and X11 extensions to get it to apps. Additional to that let's please use 64bit values and nanoseconds for every value we use here, and NOT fps, line numbers or similar :) Seconded! I'd really favour absolute timestamps even. Regards, Christian. Flickering will really depend on the panel itself. A wider ranger is more likely to exhibit the issue, but factors like topology, pixel density and other display technology can affect this. It can be hard for a driver to guess at all of this. For many panels it'd be difficult to notice unless it's consistently jumping between the min and max range. Do you mean that there is no way to know and get that information from the monitor itself? Nothing in EDID that could even be used as a default and quirk the monitors that got it wrong? The variable refresh range is exposed, but that's about it. There's certainly no simple algorithm you can feed monitor
Re: [PATCH 17/26] drm/amd/display: implement PERF_TRACE on Linux
Fully agreed. Will redesign this. From: Christian König Sent: October 13, 2018 1:40:24 PM To: Michel Dänzer; Lakha, Bhawanpreet Cc: Francis, David; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 17/26] drm/amd/display: implement PERF_TRACE on Linux Am 12.10.2018 um 18:56 schrieb Michel Dänzer: > On 2018-10-11 12:09 a.m., Bhawanpreet Lakha wrote: >> From: David Francis >> >> [Why] >> A quick-and-dirty way of getting performance data for the amdgpu >> driver would make performance improvements easier >> >> [How] >> The PERF_TRACE functionality is a tic-toc style debug method. >> Put PERF_TRACE calls on either side of the code you want to test. >> PERF_TRACE requires access to struct dc_context. PERF_TRACE() >> will pick up the CTX macro, and PERF_TRACE_CTX(struct dc_context) >> allows you to pass the context explicitly. >> >> The last 20 results can be read through the debugfs entry >> amdgpu_perf_trace. Each result contains the time in ns and >> number of GPU read/writes since the result before it. >> >> In my experimentation, each PERF_TRACE() call uses at most 700ns > Should this use the Linux tracing infrastructure? Yeah, agree that looks like reimplementing the tracing infrastructure to me as well. Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd gpu hung with opensource driver
Hi All, When use amd radeon pro wx5100 with opensource driver(mesa17.2.8 , kernel v4.17, Ubuntu17.10), GPU hung happened: [260362.362835] amdgpu 0005:01:00.0: GPU fault detected: 146 0x02c0082c [260362.369366] amdgpu 0005:01:00.0: VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x00100058 [260362.377099] amdgpu 0005:01:00.0: VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x0400802C [260362.384781] amdgpu 0005:01:00.0: VM fault (0x2c, vmid 2, pasid 32792) at page 1048664, read from 'TC0' (0x54433000) (8) [260372.423402] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx timeout, last signaled seq=132702874, last emitted seq=132702877 [260372.435267] [drm] IP block:gfx_v8_0 is hung! [260372.435276] [drm] GPU recovery disabled. It is happened accidentally when play games. How to debug and solve this problem ? Please help, Thanks : ) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 2/3] drm: Add variable refresh property to DRM CRTC
On Fri, 12 Oct 2018 08:58:23 -0400 "Kazlauskas, Nicholas" wrote: > On 10/12/2018 07:20 AM, Koenig, Christian wrote: > > Am 12.10.2018 um 11:21 schrieb Pekka Paalanen: > >> On Fri, 12 Oct 2018 07:35:57 + > >> "Koenig, Christian" wrote: > >> > >>> Am 12.10.2018 um 09:23 schrieb Pekka Paalanen: > On Wed, 10 Oct 2018 09:35:50 -0400 > "Kazlauskas, Nicholas" wrote: > > > The patches I've put out target this use case mostly because of the > > benefit for a relatively simple interface. VRR can and has been used in > > more ways that this, however. > > > > An example usecase that differs from this would actually be video > > playback. The monitor's refresh rate likely doesn't align with the video > > content rate. An API that exposes direct control over VRR (like the > > range, refresh duration, presentation timestamp) could allow the > > application to specify the content rate directly to deliver a smoother > > playback experience. For example, if you had a 24fps video and the VRR > > range was 40-60Hz you could target 48Hz via some API here. > The way that has been discussed to be implemented is that DRM page flips > would carry a target timestamp, which the driver will then meet as good > as it can. It is better to define an absolute target timestamp than a > frequency, because a timestamp can be used to synchronize with audio > and more. Mario Kleiner can tell you all about scientific use cases > that require accurate display timing, not just frequency. > >>> To summarize what information should be provided by the driver stack to > >>> make applications happy: > >>> > >>> 1. The minimum time a frame can be displayed, in other words the maximum > >>> frame rate. > >>> 2. The maximum time a frame can be displayed, in other words the minimum > >>> frame rate. > >>> 3. How much change of frame timing is allowed between frames to avoid > >>> luminescence flickering. > >>> > >>> Number 1 and 2 can also be used to signal the availability of VRR to > >>> applications, e.g. if they are identical we don't support VRR at all. > >> Hi Christian, > >> > >> "the maximum time a frame can be displayed" is perhaps not an > >> unambiguous definition. A frame can be shown indefinitely in any case. > > > > Good point. Please also note that I'm not an expert on the display stuff > > in general. > > > > My background comes more from implementing the VDPAU mediaplayer > > interface in mesa. > > > > So just throwing some ideas in here from the point of view of an > > userspace developer which wants to write a media player :) Excellent! > >> The CRTC will simply start scanning out the same frame again if there > >> is no new one. I understand what you want to say, but perhaps some > >> different words will be in order. > >> > >> I do wonder if applications really want to know the maximum acceptable > >> slew rate in timings... maybe that should be left for the drivers to > >> apply. What I'm thinking is that we have the page flip timestamp with > >> the page flip events to tell when the new FB became active. That > >> information could be extended with a time range on when the very next > >> flip could take place. Applications are already computing that > >> prediction from the flip timestamp and fixed refresh rate, but it might > >> be nice to give them the driver's opinion explicitly. Maybe the > >> tolerable slew rate is not a constant. > > > > Well it depends. I agree that the kernel should probably enforce the > > slew rate to avoid flickering for the user. That's what I was hoping if the monitor hardware does not do that already, but now it sounds like it's not possible. Another failed assumption from my side. > > But it might be beneficial for the application to know things like this. Applications should know when they could likely flip, my question is how to tell them. Is the acceptable slew rate a constant for a video mode, or does it depend on the previous refresh interval. > > > > What the application definitely needs to know is when a frame was > > actually displayed. E.g. application says I want to display this at > > point X and at some point later the kernel returns saying the frame was > > displayed at point N where in the ideal case N=X. You already get this timestamp with the DRM page flip events. We also have Wayland and X11 extensions to get it to apps. > > > > Additional to that let's please use 64bit values and nanoseconds for > > every value we use here, and NOT fps, line numbers or similar :) Seconded! I'd really favour absolute timestamps even. > > > > Regards, > > Christian. > > Flickering will really depend on the panel itself. A wider ranger is > more likely to exhibit the issue, but factors like topology, pixel > density and other display technology can affect this. > > It can be hard for a driver to guess at all of this. For many panels > it'd be difficult to notice
[PATCH v2] drm: Get ref on CRTC commit object when waiting for flip_done
From: Leo Li This fixes a general protection fault, caused by accessing the contents of a flip_done completion object that has already been freed. It occurs due to the preemption of a non-blocking commit worker thread W by another commit thread X. X continues to clear its atomic state at the end, destroying the CRTC commit object that W still needs. Switching back to W and accessing the commit objects then leads to bad results. Worker W becomes preemptable when waiting for flip_done to complete. At this point, a frequently occurring commit thread X can take over. Here's an example where W is a worker thread that flips on both CRTCs, and X does a legacy cursor update on both CRTCs: ... 1. W does flip work 2. W runs commit_hw_done() 3. W waits for flip_done on CRTC 1 4. > flip_done for CRTC 1 completes 5. W finishes waiting for CRTC 1 6. W waits for flip_done on CRTC 2 7. > Preempted by X 8. > flip_done for CRTC 2 completes 9. X atomic_check: hw_done and flip_done are complete on all CRTCs 10. X updates cursor on both CRTCs 11. X destroys atomic state 12. X done 13. > Switch back to W 14. W waits for flip_done on CRTC 2 15. W raises general protection fault The error looks like so: general protection fault: [#1] PREEMPT SMP PTI **snip** Call Trace: lock_acquire+0xa2/0x1b0 _raw_spin_lock_irq+0x39/0x70 wait_for_completion_timeout+0x31/0x130 drm_atomic_helper_wait_for_flip_done+0x64/0x90 [drm_kms_helper] amdgpu_dm_atomic_commit_tail+0xcae/0xdd0 [amdgpu] commit_tail+0x3d/0x70 [drm_kms_helper] process_one_work+0x212/0x650 worker_thread+0x49/0x420 kthread+0xfb/0x130 ret_from_fork+0x3a/0x50 Modules linked in: x86_pkg_temp_thermal amdgpu(O) chash(O) gpu_sched(O) drm_kms_helper(O) syscopyarea sysfillrect sysimgblt fb_sys_fops ttm(O) drm(O) Note that i915 has this issue masked, since hw_done is signaled after waiting for flip_done. Doing so will block the cursor update from happening until hw_done is signaled, preventing the cursor commit from destroying the state. v2: The reference on the commit object needs to be obtained before hw_done() is signaled, since that's the point where another commit is allowed to modify the state. Assuming that the new_crtc_state->commit object still exists within flip_done() is incorrect. Fix by getting a reference in setup_commit(), and releasing it during default_clear(). Signed-off-by: Leo Li --- Sending again, this time to the correct mailing list :) Leo drivers/gpu/drm/drm_atomic.c| 5 + drivers/gpu/drm/drm_atomic_helper.c | 12 include/drm/drm_atomic.h| 11 +++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 3eb061e..12ae917 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->crtcs[i].state = NULL; state->crtcs[i].old_state = NULL; state->crtcs[i].new_state = NULL; + + if (state->crtcs[i].commit) { + drm_crtc_commit_put(state->crtcs[i].commit); + state->crtcs[i].commit = NULL; + } } for (i = 0; i < config->num_total_plane; i++) { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 80be74d..1bb4c31 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks); void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev, struct drm_atomic_state *old_state) { - struct drm_crtc_state *new_crtc_state; struct drm_crtc *crtc; int i; - for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) { - struct drm_crtc_commit *commit = new_crtc_state->commit; + for (i = 0; i < dev->mode_config.num_crtc; i++) { + struct drm_crtc_commit *commit = old_state->crtcs[i].commit; int ret; - if (!commit) + crtc = old_state->crtcs[i].ptr; + + if (!crtc || !commit) continue; ret = wait_for_completion_timeout(>flip_done, 10 * HZ); @@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, drm_crtc_commit_get(commit); commit->abort_completion = true; + + state->crtcs[i].commit = commit; + drm_crtc_commit_get(commit); } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
Re: [RFC] drm/amd/display: add SI support to AMD DC
On Mon, Oct 15, 2018 at 07:28:57AM +0200, Mauro Rossi wrote: > dpm for SI is available, while powerplay for SI is not, but > display/amdgpu_dm uses some powerplay calls, where get_static_clock > functions not available and the *ERROR* DM_PPLIB is due to missing handling > in powerplay I though powerplay was just more power states for the dpm state machines to play with. I recall parsing ATOMBIOS pplay tables for records of (voltages/clocks frequencies). Maybe the future is to trust the OS kernel and perform all "low speed" 3D asic functions there and keep only high speed 3D operations in the asic. -- Sylvain ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v5 2/4] drm: Add vrr_enabled property to drm CRTC
Am 15.10.2018 um 12:06 schrieb Michel Dänzer: > [SNIP] > Apart from the above, another issue is that this would give direct > control to the client on whether or not VRR should be used. But we want > to allow the user to disable VRR even if a client wants to use it, via > an RandR output property. This requires that the Xorg driver can control > whether or not VRR can actually be used, via the CRTC property added by > this patch. Oh, that is a really good argument! No objection from my side to this then, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v5 2/4] drm: Add vrr_enabled property to drm CRTC
On 2018-10-15 11:47 a.m., Christian König wrote: > Am 15.10.2018 um 11:40 schrieb Michel Dänzer: >> On 2018-10-13 7:38 p.m., Christian König wrote: >>> Am 12.10.2018 um 18:44 schrieb Nicholas Kazlauskas: This patch introduces the 'vrr_enabled' CRTC property to allow dynamic control over variable refresh rate support for a CRTC. This property should be treated like a content hint to the driver - if the hardware or driver is not capable of driving variable refresh timings then this is not considered an error. Capability for variable refresh rate support should be determined by querying the vrr_capable drm connector property. It is worth noting that while the property is intended for atomic use it isn't filtered from legacy userspace queries. This allows for Xorg userspace drivers to implement support. >>> I'm honestly still not convinced that a per CRTC property is actually >>> the right approach. >>> >>> What we should rather do instead is to implement a target timestamp for >>> the page flip. >> You'd have to be more specific about how the latter could completely >> replace the former. I don't see how it could. > > Each flip request send by an application gets a timestamp when the flip > should be displayed. > > If I'm not completely mistaken we already have something like that in > both DRI2 as well as DRI3. Certainly not DRI2, but for now we're not enabling VRR with that anyway. While the X11 Present extension specifies PresentOptionUST for this, support for it isn't implemented yet in xserver (as in setting the option has no effect, the X server interprets the target value as an MSC anyway). So this couldn't work before the next major release of xserver, which based on recent history could be at least about one year away. In contrast, the CRTC property based solution for the gaming use-case can work even with older xserver versions. > So as far as I can see we only need to add an extra flag that those > information are trust worth in the context of VRR as well. > > If we don't set this flag we always get the always working fallback > behavior, e.g. VRR is disabled and we have a fixed refresh rate. > > If we set this flag and the timestamp is in the past we get the VRR > behavior to display the next frame as soon as possible. > > If we set this flag and specific a specific timestamp then we get the > VRR behavior to display the frame as close as possible to the specified > timestamp. Apart from the above, another issue is that this would give direct control to the client on whether or not VRR should be used. But we want to allow the user to disable VRR even if a client wants to use it, via an RandR output property. This requires that the Xorg driver can control whether or not VRR can actually be used, via the CRTC property added by this patch. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v5 2/4] drm: Add vrr_enabled property to drm CRTC
Am 15.10.2018 um 11:40 schrieb Michel Dänzer: On 2018-10-13 7:38 p.m., Christian König wrote: Am 12.10.2018 um 18:44 schrieb Nicholas Kazlauskas: This patch introduces the 'vrr_enabled' CRTC property to allow dynamic control over variable refresh rate support for a CRTC. This property should be treated like a content hint to the driver - if the hardware or driver is not capable of driving variable refresh timings then this is not considered an error. Capability for variable refresh rate support should be determined by querying the vrr_capable drm connector property. It is worth noting that while the property is intended for atomic use it isn't filtered from legacy userspace queries. This allows for Xorg userspace drivers to implement support. I'm honestly still not convinced that a per CRTC property is actually the right approach. What we should rather do instead is to implement a target timestamp for the page flip. You'd have to be more specific about how the latter could completely replace the former. I don't see how it could. Each flip request send by an application gets a timestamp when the flip should be displayed. If I'm not completely mistaken we already have something like that in both DRI2 as well as DRI3. So as far as I can see we only need to add an extra flag that those information are trust worth in the context of VRR as well. If we don't set this flag we always get the always working fallback behavior, e.g. VRR is disabled and we have a fixed refresh rate. If we set this flag and the timestamp is in the past we get the VRR behavior to display the next frame as soon as possible. If we set this flag and specific a specific timestamp then we get the VRR behavior to display the frame as close as possible to the specified timestamp. Regards, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v5 2/4] drm: Add vrr_enabled property to drm CRTC
On 2018-10-13 7:38 p.m., Christian König wrote: > Am 12.10.2018 um 18:44 schrieb Nicholas Kazlauskas: >> This patch introduces the 'vrr_enabled' CRTC property to allow >> dynamic control over variable refresh rate support for a CRTC. >> >> This property should be treated like a content hint to the driver - >> if the hardware or driver is not capable of driving variable refresh >> timings then this is not considered an error. >> >> Capability for variable refresh rate support should be determined >> by querying the vrr_capable drm connector property. >> >> It is worth noting that while the property is intended for atomic use >> it isn't filtered from legacy userspace queries. This allows for Xorg >> userspace drivers to implement support. > > I'm honestly still not convinced that a per CRTC property is actually > the right approach. > > What we should rather do instead is to implement a target timestamp for > the page flip. You'd have to be more specific about how the latter could completely replace the former. I don't see how it could. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 7/7] drm/amdgpu: update version for timeline syncobj support in amdgpu
I'm on sick leave today. But I will see what I can do later in the afternoon, Christian. Am 15.10.2018 um 11:01 schrieb Zhou, David(ChunMing): > Ping... > Christian, Could I get your RB on the series? And help me to push to drm-misc? > After that I can rebase libdrm header file based on drm-next. > > Thanks, > David Zhou > >> -Original Message- >> From: amd-gfx On Behalf Of >> Chunming Zhou >> Sent: Monday, October 15, 2018 4:56 PM >> To: dri-de...@lists.freedesktop.org >> Cc: Zhou, David(ChunMing) ; amd- >> g...@lists.freedesktop.org >> Subject: [PATCH 7/7] drm/amdgpu: update version for timeline syncobj >> support in amdgpu >> >> Signed-off-by: Chunming Zhou >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> index 6870909da926..58cba492ba55 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> @@ -70,9 +70,10 @@ >>* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). >>* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. >>* - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. >> + * - 3.28.0 - Add syncobj timeline support to AMDGPU_CS. >>*/ >> #define KMS_DRIVER_MAJOR 3 >> -#define KMS_DRIVER_MINOR27 >> +#define KMS_DRIVER_MINOR28 >> #define KMS_DRIVER_PATCHLEVEL 0 >> >> int amdgpu_vram_limit = 0; >> -- >> 2.17.1 >> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 7/7] drm/amdgpu: update version for timeline syncobj support in amdgpu
Ping... Christian, Could I get your RB on the series? And help me to push to drm-misc? After that I can rebase libdrm header file based on drm-next. Thanks, David Zhou > -Original Message- > From: amd-gfx On Behalf Of > Chunming Zhou > Sent: Monday, October 15, 2018 4:56 PM > To: dri-de...@lists.freedesktop.org > Cc: Zhou, David(ChunMing) ; amd- > g...@lists.freedesktop.org > Subject: [PATCH 7/7] drm/amdgpu: update version for timeline syncobj > support in amdgpu > > Signed-off-by: Chunming Zhou > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 6870909da926..58cba492ba55 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -70,9 +70,10 @@ > * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. > * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. > + * - 3.28.0 - Add syncobj timeline support to AMDGPU_CS. > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 27 > +#define KMS_DRIVER_MINOR 28 > #define KMS_DRIVER_PATCHLEVEL0 > > int amdgpu_vram_limit = 0; > -- > 2.17.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 7/7] drm/amdgpu: update version for timeline syncobj support in amdgpu
Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6870909da926..58cba492ba55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -70,9 +70,10 @@ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.28.0 - Add syncobj timeline support to AMDGPU_CS. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 27 +#define KMS_DRIVER_MINOR 28 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/7] drm: add timeline syncobj payload query ioctl v2
user mode can query timeline payload. v2: check return value of copy_to_user Signed-off-by: Chunming Zhou --- drivers/gpu/drm/drm_internal.h | 2 ++ drivers/gpu/drm/drm_ioctl.c| 2 ++ drivers/gpu/drm/drm_syncobj.c | 52 ++ include/uapi/drm/drm.h | 11 +++ 4 files changed, 67 insertions(+) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 566d44e3c782..9c4826411a3c 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -189,6 +189,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); /* drm_framebuffer.c */ void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent, diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index c0891614f516..c3c0617e6372 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -675,6 +675,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f3f11ac2ef28..17124d40532f 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1304,3 +1304,55 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, return ret; } + +static void drm_syncobj_timeline_query_payload(struct drm_syncobj *syncobj, + uint64_t *point) +{ + if (syncobj->type != DRM_SYNCOBJ_TYPE_TIMELINE) { + DRM_ERROR("Normal syncobj cann't be queried!"); + *point = 0; + return; + } + *point = syncobj->signal_point; +} + +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_timeline_query *args = data; + struct drm_syncobj **syncobjs; + uint64_t *points; + uint32_t i; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, +u64_to_user_ptr(args->handles), +args->count_handles, +); + if (ret < 0) + return ret; + + + points = kmalloc_array(args->count_handles, sizeof(*points), + GFP_KERNEL); + if (points == NULL) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < args->count_handles; i++) + drm_syncobj_timeline_query_payload(syncobjs[i], [i]); + ret = copy_to_user(u64_to_user_ptr(args->points), points, + sizeof(uint64_t) * args->count_handles) ? -EFAULT : 0; + + kfree(points); +out: + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index c8bc1414753d..23c4979d8a1c 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -771,6 +771,15 @@ struct drm_syncobj_array { __u32 pad; }; +struct drm_syncobj_timeline_query { + __u64 handles; + /* points are timeline syncobjs payloads returned by query ioctl */ + __u64 points; + __u32 count_handles; + __u32 pad; +}; + + /* Query current scanout sequence number */ struct drm_crtc_get_sequence { __u32 crtc_id; /* requested crtc_id */ @@ -928,6 +937,8 @@ extern "C" { #define DRM_IOCTL_MODE_REVOKE_LEASEDRM_IOWR(0xC9, struct drm_mode_revoke_lease) #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAITDRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERYDRM_IOWR(0xCB, struct drm_syncobj_timeline_query) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- 2.17.1 ___ amd-gfx mailing
[PATCH 6/7] drm/amdgpu: add timeline support in amdgpu CS v2
syncobj wait/signal operation is appending in command submission. v2: separate to two kinds in/out_deps functions Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 111 + include/uapi/drm/amdgpu_drm.h | 9 ++ 3 files changed, 112 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 447c4c7a36d6..6e4a3db56833 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -975,6 +975,11 @@ struct amdgpu_cs_chunk { void*kdata; }; +struct amdgpu_cs_syncobj_post_dep { + struct drm_syncobj *post_dep_syncobj; + u64 point; +}; + struct amdgpu_cs_parser { struct amdgpu_device*adev; struct drm_file *filp; @@ -1003,9 +1008,8 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; - + struct amdgpu_cs_syncobj_post_dep *post_dep_syncobjs; unsigned num_post_dep_syncobjs; - struct drm_syncobj **post_dep_syncobjs; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 412fac238575..7429e7941f4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -204,6 +204,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: break; default: @@ -783,7 +785,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, >validated); for (i = 0; i < parser->num_post_dep_syncobjs; i++) - drm_syncobj_put(parser->post_dep_syncobjs[i]); + drm_syncobj_put(parser->post_dep_syncobjs[i].post_dep_syncobj); kfree(parser->post_dep_syncobjs); dma_fence_put(parser->fence); @@ -1098,13 +1100,17 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, } static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, -uint32_t handle) +uint32_t handle, u64 point, +u64 flags) { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, ); - if (r) + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, ); + if (r) { + DRM_ERROR("syncobj %u failed to find fence!\n", handle); return r; + } r = amdgpu_sync_fence(p->adev, >job->sync, fence, true); dma_fence_put(fence); @@ -1115,46 +1121,108 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i, r; - struct drm_amdgpu_cs_chunk_sem *deps; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, +struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); if (r) return r; } + return 0; } static int amdgpu_cs_process_syncobj_out_dep(struct
[PATCH 5/7] drm: add timeline support for syncobj export/import
user space can specify timeline point fence to export/import. Signed-off-by: Chunming Zhou --- drivers/gpu/drm/drm_internal.h | 4 ++ drivers/gpu/drm/drm_ioctl.c| 4 ++ drivers/gpu/drm/drm_syncobj.c | 76 ++ include/uapi/drm/drm.h | 11 + 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 9c4826411a3c..5ad6cbdb68ab 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -181,6 +181,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_handle_to_fd_ioctl2(struct drm_device *dev, void *data, + struct drm_file *file_private); +int drm_syncobj_fd_to_handle_ioctl2(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index c3c0617e6372..b80c2c28aee0 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -667,6 +667,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD2, drm_syncobj_handle_to_fd_ioctl2, + DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE2, drm_syncobj_fd_to_handle_ioctl2, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 17124d40532f..aed492570d85 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -683,7 +683,7 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, - int fd, int handle) + int fd, int handle, uint64_t point) { struct dma_fence *fence = sync_file_get_fence(fd); struct drm_syncobj *syncobj; @@ -697,14 +697,14 @@ static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, return -ENOENT; } - drm_syncobj_replace_fence(syncobj, 0, fence); + drm_syncobj_replace_fence(syncobj, point, fence); dma_fence_put(fence); drm_syncobj_put(syncobj); return 0; } static int drm_syncobj_export_sync_file(struct drm_file *file_private, - int handle, int *p_fd) + int handle, uint64_t point, int *p_fd) { int ret; struct dma_fence *fence; @@ -714,7 +714,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private, if (fd < 0) return fd; - ret = drm_syncobj_find_fence(file_private, handle, 0, 0, ); + ret = drm_syncobj_find_fence(file_private, handle, point, 0, ); if (ret) goto err_put_fd; @@ -823,9 +823,14 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data, args->flags != DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE) return -EINVAL; - if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE) + if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE) { + struct drm_syncobj *syncobj = drm_syncobj_find(file_private, + args->handle); + if (!syncobj || syncobj->type != DRM_SYNCOBJ_TYPE_BINARY) + return -EINVAL; return drm_syncobj_export_sync_file(file_private, args->handle, - >fd); + 0, >fd); + } return drm_syncobj_handle_to_fd(file_private, args->handle, >fd); @@ -837,6 +842,61 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_handle *args = data; + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if
[PATCH 3/7] drm: add support of syncobj timeline point wait v2
points array is one-to-one match with syncobjs array. v2: add seperate ioctl for timeline point wait, otherwise break uapi. v3: userspace can specify two kinds waits:: a. Wait for time point to be completed. b. and wait for time point to become available Signed-off-by: Chunming Zhou --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c| 2 + drivers/gpu/drm/drm_syncobj.c | 118 - include/uapi/drm/drm.h | 18 + 4 files changed, 124 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 0c4eb4a9ab31..566d44e3c782 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -183,6 +183,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 6b4a633b4240..c0891614f516 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -669,6 +669,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 67472bd77c83..f3f11ac2ef28 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -126,13 +126,14 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, } static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, +u64 point, struct dma_fence **fence, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { int ret; - ret = drm_syncobj_search_fence(syncobj, 0, 0, fence); + ret = drm_syncobj_search_fence(syncobj, point, 0, fence); if (!ret) return 1; @@ -143,7 +144,7 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, */ if (!list_empty(>signal_pt_list)) { spin_unlock(>lock); - drm_syncobj_search_fence(syncobj, 0, 0, fence); + drm_syncobj_search_fence(syncobj, point, 0, fence); if (*fence) return 1; spin_lock(>lock); @@ -354,13 +355,17 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + + INIT_LIST_HEAD(_list); spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); cur->func(syncobj, cur); } - spin_unlock(>lock); } } EXPORT_SYMBOL(drm_syncobj_replace_fence); @@ -856,6 +861,7 @@ struct syncobj_wait_entry { struct dma_fence *fence; struct dma_fence_cb fence_cb; struct drm_syncobj_cb syncobj_cb; + u64point; }; static void syncobj_wait_fence_func(struct dma_fence *fence, @@ -873,12 +879,13 @@ static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, struct syncobj_wait_entry *wait = container_of(cb, struct syncobj_wait_entry, syncobj_cb); - drm_syncobj_search_fence(syncobj, 0, 0, >fence); + drm_syncobj_search_fence(syncobj, wait->point, 0, >fence); wake_up_process(wait->task); } static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + void __user *user_points, uint32_t count,
[PATCH 2/7] drm: add syncobj timeline support v8
This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: This extension introduces a new type of syncobj that has an integer payload identifying a point in a timeline. Such timeline syncobjs support the following operations: * CPU query - A host operation that allows querying the payload of the timeline syncobj. * CPU wait - A host operation that allows a blocking wait for a timeline syncobj to reach a specified value. * Device wait - A device operation that allows waiting for a timeline syncobj to reach a specified value. * Device signal - A device operation that allows advancing the timeline syncobj to a specified value. v1: Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. a. signal PT design: Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, the timeline will increase to value of PT[N]. b. wait PT design: Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to perform that. v2: 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) 2. move unexposed denitions to .c file. (Daniel Vetter) 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) 4. split up the change to drm_syncobj_replace_fence() in a separate patch. 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) v3: 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) a. normal syncobj signal op will create a signal PT to tail of signal pt list. b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. 2. many bug fix and clean up 3. stub fence moving is moved to other patch. v4: 1. fix RB tree loop with while(node=rb_first(...)). (Christian) 2. fix syncobj lifecycle. (Christian) 3. only enable_signaling when there is wait_pt. (Christian) 4. fix timeline path issues. 5. write a timeline test in libdrm v5: (Christian) 1. semaphore is called syncobj in kernel side. 2. don't need 'timeline' characters in some function name. 3. keep syncobj cb. v6: (Christian) 1. merge syncobj_timeline to syncobj structure. 2. simplify some check sentences. 3. some misc change. 4. fix CTS failed issue. v7: (Christian) 1. error handling when creating signal pt. 2. remove timeline naming in func. 3. export flags in find_fence. 4. allow reset timeline. v8: 1. use wait_event_interruptible without timeout 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* timeline syncobj is tested by ./amdgpu_test -s 9 Signed-off-by: Chunming Zhou Cc: Christian Konig Cc: Dave Airlie Cc: Daniel Rakos Cc: Daniel Vetter Reviewed-by: Christian König --- drivers/gpu/drm/drm_syncobj.c | 287 ++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- include/drm/drm_syncobj.h | 65 ++--- include/uapi/drm/drm.h | 1 + 4 files changed, 281 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f796c9fc3858..67472bd77c83 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -56,6 +56,9 @@ #include "drm_internal.h" #include +/* merge normal syncobj to timeline syncobj, the point interval is 1 */ +#define DRM_SYNCOBJ_BINARY_POINT 1 + struct drm_syncobj_stub_fence { struct dma_fence base; spinlock_t lock; @@ -82,6 +85,11 @@ static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { .release = drm_syncobj_stub_fence_release, }; +struct drm_syncobj_signal_pt { + struct dma_fence_array *base; + u64value; + struct list_head list; +}; /** * drm_syncobj_find - lookup and reference a sync object. @@ -124,8 +132,8 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, { int ret; - *fence = drm_syncobj_fence_get(syncobj); - if (*fence) + ret = drm_syncobj_search_fence(syncobj, 0, 0, fence); + if (!ret) return 1; spin_lock(>lock); @@ -133,10 +141,12 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ - if (syncobj->fence) { - *fence =
[PATCH 1/7] drm: add flags to drm_syncobj_find_fence
flags can be used by driver to decide whether need to block wait submission. Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/drm_syncobj.c | 4 ++-- drivers/gpu/drm/v3d/v3d_gem.c | 4 ++-- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- include/drm/drm_syncobj.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d9d2ede96490..412fac238575 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1102,7 +1102,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, ); + r = drm_syncobj_find_fence(p->filp, handle, 0, 0, ); if (r) return r; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index e9ce623d049e..f796c9fc3858 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -235,7 +235,7 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) * dma_fence_put(). */ int drm_syncobj_find_fence(struct drm_file *file_private, - u32 handle, u64 point, + u32 handle, u64 point, u64 flags, struct dma_fence **fence) { struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle); @@ -506,7 +506,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private, if (fd < 0) return fd; - ret = drm_syncobj_find_fence(file_private, handle, 0, ); + ret = drm_syncobj_find_fence(file_private, handle, 0, 0, ); if (ret) goto err_put_fd; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 70c54774400b..97477879d3d4 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, kref_init(>refcount); ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, -0, >bin.in_fence); +0, 0, >bin.in_fence); if (ret == -EINVAL) goto fail; ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, -0, >render.in_fence); +0, 0, >render.in_fence); if (ret == -EINVAL) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 5b22e996af6c..251198194c38 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (args->in_sync) { ret = drm_syncobj_find_fence(file_priv, args->in_sync, -0, _fence); +0, 0, _fence); if (ret) goto fail; diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 425432b85a87..2eda44def639 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -134,7 +134,7 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point, struct dma_fence *fence); int drm_syncobj_find_fence(struct drm_file *file_private, - u32 handle, u64 point, + u32 handle, u64 point, u64 flags, struct dma_fence **fence); void drm_syncobj_free(struct kref *kref); int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx