[PATCH] drm/amd/pp: Add #ifdef checks for CONFIG_ACPI
Fix compiling error when CONFIG_ACPI not enabled. Change-Id: I20d3f55bbd2ad68e65363cde7452802b063643f0 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 8a37c80..aa83114 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -902,8 +902,10 @@ void hwmgr_init_default_caps(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_UVDDPM); phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_VCEDPM); +#if defined(CONFIG_ACPI) if (amdgpu_acpi_is_pcie_performance_request_supported(hwmgr->adev)) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PCIEPerformanceRequest); +#endif phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 62ebe15..4f26014 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3596,6 +3596,7 @@ static int smu7_request_link_speed_change_before_state_change( if (target_link_speed > current_link_speed) { switch (target_link_speed) { +#ifdef CONFIG_ACPI case PP_PCIEGen3: if (0 == amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN3, false)) break; @@ -3605,6 +3606,7 @@ static int smu7_request_link_speed_change_before_state_change( case PP_PCIEGen2: if (0 == amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false)) break; +#endif default: data->force_pcie_gen = smu7_get_current_pcie_speed(hwmgr); break; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/pp: Drop wrapper functions for upper/lower_32_bits
On Tue, Mar 6, 2018 at 12:46 AM, Rex Zhuwrote: > replace smu_upper_32_bits/smu_lower_32_bits with > the standard kernel macros > > Change-Id: I89dec914c716d6dd74f39f3eb63d875f750d460e > Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 5 - > drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c | 24 > +++--- > drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c | 8 > drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 12 +-- > .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 12 +-- > 5 files changed, 28 insertions(+), 33 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > index 8872c5c..9bba0a0 100644 > --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > @@ -26,11 +26,6 @@ > #include "amd_powerplay.h" > #include "hwmgr.h" > > -#define smu_lower_32_bits(n) ((uint32_t)(n)) > -#define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16)) > - > - > - > enum AVFS_BTC_STATUS { > AVFS_BTC_BOOT = 0, > AVFS_BTC_BOOT_STARTEDSMU, > diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > index df58539..957739a 100644 > --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > @@ -204,11 +204,11 @@ static int cz_load_mec_firmware(struct pp_hwmgr *hwmgr) > tmp = PHM_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, MTYPE, 1); > cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_CNTL, tmp); > > - reg_data = smu_lower_32_bits(info.mc_addr) & > + reg_data = lower_32_bits(info.mc_addr) & > PHM_FIELD_MASK(CP_CPC_IC_BASE_LO, IC_BASE_LO); > cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_LO, reg_data); > > - reg_data = smu_upper_32_bits(info.mc_addr) & > + reg_data = upper_32_bits(info.mc_addr) & > PHM_FIELD_MASK(CP_CPC_IC_BASE_HI, IC_BASE_HI); > cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_HI, reg_data); > > @@ -347,8 +347,8 @@ static int cz_smu_populate_single_scratch_task( > return -EINVAL; > } > > - task->addr.low = smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr); > - task->addr.high = > smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr); > + task->addr.low = lower_32_bits(cz_smu->scratch_buffer[i].mc_addr); > + task->addr.high = upper_32_bits(cz_smu->scratch_buffer[i].mc_addr); > task->size_bytes = cz_smu->scratch_buffer[i].data_size; > > if (CZ_SCRATCH_ENTRY_DATA_ID_IH_REGISTERS == fw_enum) { > @@ -384,8 +384,8 @@ static int cz_smu_populate_single_ucode_load_task( > return -EINVAL; > } > > - task->addr.low = smu_lower_32_bits(cz_smu->driver_buffer[i].mc_addr); > - task->addr.high = smu_upper_32_bits(cz_smu->driver_buffer[i].mc_addr); > + task->addr.low = lower_32_bits(cz_smu->driver_buffer[i].mc_addr); > + task->addr.high = upper_32_bits(cz_smu->driver_buffer[i].mc_addr); > task->size_bytes = cz_smu->driver_buffer[i].data_size; > > return 0; > @@ -613,11 +613,11 @@ static int cz_download_pptable_settings(struct pp_hwmgr > *hwmgr, void **table) > > cz_send_msg_to_smc_with_parameter(hwmgr, > PPSMC_MSG_SetClkTableAddrHi, > - > smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > + > upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > > cz_send_msg_to_smc_with_parameter(hwmgr, > PPSMC_MSG_SetClkTableAddrLo, > - > smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > + > lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > > cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob, > cz_smu->toc_entry_clock_table); > @@ -640,11 +640,11 @@ static int cz_upload_pptable_settings(struct pp_hwmgr > *hwmgr) > > cz_send_msg_to_smc_with_parameter(hwmgr, > PPSMC_MSG_SetClkTableAddrHi, > - > smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > + > upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > > cz_send_msg_to_smc_with_parameter(hwmgr, > PPSMC_MSG_SetClkTableAddrLo, > - > smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > + > lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); > > cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob, >
[PATCH] drm/amd/pp: Drop wrapper functions for upper/lower_32_bits
replace smu_upper_32_bits/smu_lower_32_bits with the standard kernel macros Change-Id: I89dec914c716d6dd74f39f3eb63d875f750d460e Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 5 - drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c | 24 +++--- drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c | 8 drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 12 +-- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 12 +-- 5 files changed, 28 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 8872c5c..9bba0a0 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -26,11 +26,6 @@ #include "amd_powerplay.h" #include "hwmgr.h" -#define smu_lower_32_bits(n) ((uint32_t)(n)) -#define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16)) - - - enum AVFS_BTC_STATUS { AVFS_BTC_BOOT = 0, AVFS_BTC_BOOT_STARTEDSMU, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c index df58539..957739a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c @@ -204,11 +204,11 @@ static int cz_load_mec_firmware(struct pp_hwmgr *hwmgr) tmp = PHM_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, MTYPE, 1); cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_CNTL, tmp); - reg_data = smu_lower_32_bits(info.mc_addr) & + reg_data = lower_32_bits(info.mc_addr) & PHM_FIELD_MASK(CP_CPC_IC_BASE_LO, IC_BASE_LO); cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_LO, reg_data); - reg_data = smu_upper_32_bits(info.mc_addr) & + reg_data = upper_32_bits(info.mc_addr) & PHM_FIELD_MASK(CP_CPC_IC_BASE_HI, IC_BASE_HI); cgs_write_register(hwmgr->device, mmCP_CPC_IC_BASE_HI, reg_data); @@ -347,8 +347,8 @@ static int cz_smu_populate_single_scratch_task( return -EINVAL; } - task->addr.low = smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr); - task->addr.high = smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr); + task->addr.low = lower_32_bits(cz_smu->scratch_buffer[i].mc_addr); + task->addr.high = upper_32_bits(cz_smu->scratch_buffer[i].mc_addr); task->size_bytes = cz_smu->scratch_buffer[i].data_size; if (CZ_SCRATCH_ENTRY_DATA_ID_IH_REGISTERS == fw_enum) { @@ -384,8 +384,8 @@ static int cz_smu_populate_single_ucode_load_task( return -EINVAL; } - task->addr.low = smu_lower_32_bits(cz_smu->driver_buffer[i].mc_addr); - task->addr.high = smu_upper_32_bits(cz_smu->driver_buffer[i].mc_addr); + task->addr.low = lower_32_bits(cz_smu->driver_buffer[i].mc_addr); + task->addr.high = upper_32_bits(cz_smu->driver_buffer[i].mc_addr); task->size_bytes = cz_smu->driver_buffer[i].data_size; return 0; @@ -613,11 +613,11 @@ static int cz_download_pptable_settings(struct pp_hwmgr *hwmgr, void **table) cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetClkTableAddrHi, - smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); + upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetClkTableAddrLo, - smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); + lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob, cz_smu->toc_entry_clock_table); @@ -640,11 +640,11 @@ static int cz_upload_pptable_settings(struct pp_hwmgr *hwmgr) cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetClkTableAddrHi, - smu_upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); + upper_32_bits(cz_smu->scratch_buffer[i].mc_addr)); cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetClkTableAddrLo, - smu_lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); + lower_32_bits(cz_smu->scratch_buffer[i].mc_addr)); cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob, cz_smu->toc_entry_clock_table); @@ -675,11 +675,11 @@ static int cz_request_smu_load_fw(struct pp_hwmgr *hwmgr) cz_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DriverDramAddrHi, - smu_upper_32_bits(cz_smu->toc_buffer.mc_addr)); +
[PATCH 2/4] drm/amdgpu: refactoring mailbox to fix TDR handshake bugs
this patch actually refactor mailbox implmentations, and all below changes are needed together to fix all those mailbox handshake issues exposured by heavey TDR test. 1)refactor all mailbox functions based on byte accessing for mb_control reason is to avoid touching non-related bits when writing trn/rcv part of mailbox_control, this way some incorrect INTR sent to hypervisor side could be avoided, and it fixes couple handshake bug. 2)trans_msg function re-impled: put a invalid logic before transmitting message to make sure the ACK bit is in a clear status, otherwise there is chance that ACK asserted already before transmitting message and lead to fake ACK polling. (hypervisor side have some tricks to workaround ACK bit being corrupted by VF FLR which hase an side effects that may make guest side ACK bit asserted wrongly), and clear TRANS_MSG words after message transferred. 3)for mailbox_flr_work, it is also re-worked: it takes the mutex lock first if invoked, to block gpu recover's participate too early while hypervisor side is doing VF FLR. (hypervisor sends FLR_NOTIFY to guest before doing VF FLR and sentds FLR_COMPLETE after VF FLR done, and the FLR_NOTIFY will trigger interrupt to guest which lead to mailbox_flr_work being invoked) This can avoid the issue that mailbox trans msg being cleared by its VF FLR. 4)for mailbox_rcv_irq IRQ routine, it should only peek msg and schedule mailbox_flr_work, instead of ACK to hypervisor itself, because FLR_NOTIFY msg sent from hypervisor side doesn't need VF's ACK (this is because VF's ACK would lead to hypervisor clear its trans_valid/msg, and this would cause handshake bug if trans_valid/msg is cleared not due to correct VF ACK but from a wrong VF ACK like this "FLR_NOTIFY" one) This fixed handshake bug that sometimes GUEST always couldn't receive "READY_TO_ACCESS_GPU" msg from hypervisor. 5)seperate polling time limite accordingly: POLL ACK cost no more than 500ms POLL MSG cost no more than 12000ms POLL FLR finish cost no more than 500ms 6) we still need to set adev into in_gpu_reset mode after we received FLR_NOTIFY from host side, this can prevent innocent app wrongly succesed to open amdgpu dri device. FLR_NOFITY is received due to an IDLE hang detected from hypervisor side which indicating GPU is already die in this VF. Change-Id: I17df8b4490a5b53a1cc2bd6c8f9bc3ee14c23f1a Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 200 ++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 4 +- 2 files changed, 111 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 271452d..8d09380 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -33,56 +33,42 @@ static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) { - u32 reg; - int timeout = AI_MAILBOX_TIMEDOUT; - u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -mmBIF_BX_PF0_MAILBOX_CONTROL)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL), reg); - - /*Wait for RCV_MSG_VALID to be 0*/ - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -mmBIF_BX_PF0_MAILBOX_CONTROL)); - while (reg & mask) { - if (timeout <= 0) { - pr_err("RCV_MSG_VALID is not cleared\n"); - break; - } - mdelay(1); - timeout -=1; - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - } + const u32 offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1; + WREG8(offset, 2); } static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) { - u32 reg; + const u32 offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4; + WREG8(offset, val ? 1 : 0); +} - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, -mmBIF_BX_PF0_MAILBOX_CONTROL)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, - TRN_MSG_VALID, val ? 1 : 0); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL), - reg); +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + *
[PATCH 4/4] dma-buf/reservation: should keep the new fence in add_shared_inplace
Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767 Signed-off-by: Monk Liu--- drivers/dma-buf/reservation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 314eb10..29b7e45 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, old_fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(obj)); - if (old_fence->context == fence->context) { + if (dma_fence_is_later(fence, old_fence)) { /* memory barrier is added by write_seqcount_begin */ RCU_INIT_POINTER(fobj->shared[i], fence); write_seqcount_end(>seq); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/4] drm/amdgpu: give warning before sleep in kiq_r/wreg
to catch error that may schedule in atomic context early on Change-Id: I49dec7c55470011729b7fa7d3e1ecfe1f38ed89f Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 42c1401..21adb1b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -167,6 +167,9 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_read; + if (in_interrupt()) + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -212,7 +215,11 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; + if (in_interrupt()) + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/4] drm/amdgpu: imlement mmio byte access helpers
mailbox register can be accessed with a byte boundry according to BIF team, so this patch prepares register byte access and will be used by following patches Change-Id: I1e84f1c6e8e75dc42eb5be09c492fa5e7eb7502a Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 292c7e7..72385bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1635,6 +1635,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); + u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); @@ -1658,6 +1661,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) +#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) + #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 65584f6..c8e1940 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -121,6 +121,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, return ret; } +/* + * MMIO register read with bytes helper functions + * @offset:bytes offset from MMIO start + * +*/ + +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { + if (offset < adev->rmmio_size) + return (readb(adev->rmmio + offset)); + BUG(); +} + +/* + * MMIO register write with bytes helper functions + * @offset:bytes offset from MMIO start + * @value: the value want to be written to the register + * +*/ +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { + if (offset < adev->rmmio_size) + writeb(value, adev->rmmio + offset); + else + BUG(); +} + + void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov
Acked-by: Alex DeucherFrom: amd-gfx on behalf of Emily Deng Sent: Monday, March 5, 2018 9:14 PM To: amd-gfx@lists.freedesktop.org Cc: Deng, Emily Subject: [PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov The sdma wptr polling memory will introduce serious issue sdma hang for sriov environment on sdma v3. And the sdma wptr polling memory is only to fix the FLR cornner case, the issue's probabity is very low. Change-Id: I2c447533aac6b16d541f58644d141228dd75dfb3 Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index ec885ff..44d7d08 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -716,10 +716,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"
Acked-by: Alex DeucherFrom: amd-gfx on behalf of Emily Deng Sent: Thursday, March 1, 2018 10:31 PM To: amd-gfx@lists.freedesktop.org Cc: Deng, Emily Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF" This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145. The sdma wptr poll memomy doesn't have the same efficiency as doorbell, and it will make sdma hang when running tests. Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353 Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 27 --- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 102dad3..5dcf98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -186,7 +186,6 @@ struct amdgpu_ring { uint64_teop_gpu_addr; u32 doorbell_index; booluse_doorbell; - booluse_pollmem; unsignedwptr_offs; unsignedfence_offs; uint64_tcurrent_ctx; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 521978c..d3fb3ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 wptr; - if (ring->use_doorbell || ring->use_pollmem) { + if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; } else { @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; + /* XXX check if swapping is necessary on BE */ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); - } else if (ring->use_pollmem) { - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; - - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); } else { int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1; @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (ring->use_pollmem) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, - SDMA0_GFX_RB_WPTR_POLL_CNTL, - ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, - SDMA0_GFX_RB_WPTR_POLL_CNTL, - ENABLE, 0); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) { ring = >sdma.instance[i].ring; ring->ring_obj = NULL; - if (!amdgpu_sriov_vf(adev)) { - ring->use_doorbell = true; - ring->doorbell_index = (i == 0) ? - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; - } else { - ring->use_pollmem = true; - } + ring->use_doorbell = true; + ring->doorbell_index = (i == 0) ? + AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list
[PATCH] drm/amdgpu: Disable sdma wptr polling memory for sriov
The sdma wptr polling memory will introduce serious issue sdma hang for sriov environment on sdma v3. And the sdma wptr polling memory is only to fix the FLR cornner case, the issue's probabity is very low. Change-Id: I2c447533aac6b16d541f58644d141228dd75dfb3 Signed-off-by: Emily Deng--- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index ec885ff..44d7d08 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -716,10 +716,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"
Ping Hi all, Please help review this, the regression introduced by the commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test. Best Wishes, Emily Deng > -Original Message- > From: Emily Deng [mailto:emily.d...@amd.com] > Sent: Friday, March 02, 2018 11:32 AM > To: amd-gfx@lists.freedesktop.org > Cc: Deng, Emily> Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr > for VF" > > This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145. > The sdma wptr poll memomy doesn't have the same efficiency as doorbell, > and it will make sdma hang when running tests. > > Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353 > Signed-off-by: Emily Deng > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 - > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 27 --- > 2 files changed, 8 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 102dad3..5dcf98b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -186,7 +186,6 @@ struct amdgpu_ring { > uint64_teop_gpu_addr; > u32 doorbell_index; > booluse_doorbell; > - booluse_pollmem; > unsignedwptr_offs; > unsignedfence_offs; > uint64_tcurrent_ctx; > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 521978c..d3fb3ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct > amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 wptr; > > - if (ring->use_doorbell || ring->use_pollmem) { > + if (ring->use_doorbell) { > /* XXX check if swapping is necessary on BE */ > wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; > } else { > @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct > amdgpu_ring *ring) > > if (ring->use_doorbell) { > u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > + > /* XXX check if swapping is necessary on BE */ > WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > WDOORBELL32(ring->doorbell_index, lower_32_bits(ring- > >wptr) << 2); > - } else if (ring->use_pollmem) { > - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > - > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > } else { > int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1; > > @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct > amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + > sdma_offsets[i], > upper_32_bits(wptr_gpu_addr)); > wptr_poll_cntl = > RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); > - if (ring->use_pollmem) > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 1); > + if (amdgpu_sriov_vf(adev)) > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); > else > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 0); > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + > sdma_offsets[i], wptr_poll_cntl); > > /* enable DMA RB */ > @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle) > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = >sdma.instance[i].ring; > ring->ring_obj = NULL; > - if (!amdgpu_sriov_vf(adev)) { > - ring->use_doorbell = true; > - ring->doorbell_index = (i == 0) ? > - AMDGPU_DOORBELL_sDMA_ENGINE0 : > AMDGPU_DOORBELL_sDMA_ENGINE1; > - } else { > - ring->use_pollmem = true; > - } > + ring->use_doorbell = true; > + ring->doorbell_index = (i == 0) ? > + AMDGPU_DOORBELL_sDMA_ENGINE0 : > AMDGPU_DOORBELL_sDMA_ENGINE1; > > sprintf(ring->name, "sdma%d", i); > r = amdgpu_ring_init(adev, ring, 1024, > -- > 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
On 2018-03-05 12:50 PM, Christian König wrote: >>> The easiest way to work around this to to add a separate IOCTL which >>> waits for VM updates to complete. Should be trivial to use >>> vm->last_update for this. >> For the cost of an extra system call. That means every GPU mapping >> operation now requires two system calls. With Spectre workarounds, that >> can be a lot more expensive. > > There is still very little overhead associated with that. > > And additional to this as long you actually need to wait it doesn't > matter if you wait directly in one system call or if you split it into > two. The time from the initial call to the end of the operation stays > the same. That's if you have waiting. For CPU page table updates, there is no waiting, so the extra system call is wasted. Another alternative would be to make the ioctl fail with -EINTR instead. That way the Thunk would be in charge of handling the restart, and we wouldn't have to pretend that no state was changed by the first call. > > You can then also pipeline multiple mappings and wait for all of them > to finish in the end. Our Thunk mapping function already bundles all the mappings it can do at once into a single ioctl. The Thunk API must guarantee that memory can be accessed when it returns. So we have no further room for improving the pipelining without changing the Thunk API semantics. Regards, Felix > > Regards, > Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/atomic: Add new reverse iterator over all plane state (V2)
On 2018-03-01 02:56 AM, S, Shirish wrote: > From: Shirish S> > Add reverse iterator for_each_oldnew_plane_in_state_reverse to compliment the > for_each_oldnew_plane_in_state way or reading plane states. > > The plane states are required to be read in reverse order for amd drivers, > cause the z order convention followed in linux is opposite to how the planes > are supposed to be presented to DC engine, which is in common to both windows > and linux. > > V2: fix compile time errors due to -Werror flag. > > Signed-off-by: Shirish S > Signed-off-by: Pratik Vishwakarma > --- > include/drm/drm_atomic.h | 22 ++ > 1 file changed, 22 insertions(+) > > diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index > cf13842..3fe8dde 100644 > --- a/include/drm/drm_atomic.h > +++ b/include/drm/drm_atomic.h > @@ -754,6 +754,28 @@ void drm_state_dump(struct drm_device *dev, struct > drm_printer *p); > (new_plane_state) = > (__state)->planes[__i].new_state, 1)) > > /** > + * for_each_oldnew_plane_in_state_reverse - iterate over all planes in > +an atomic > + * update in reverse order Looks good but can you check if docs build and render well with the newline in here? To build the docs run make DOCBOOKS="" htmldocs and then check that it renders correctly in Documentation/output/gpu/drm_kms.html Harry > + * @__state: drm_atomic_state pointer > + * @plane: drm_plane iteration cursor > + * @old_plane_state: drm_plane_state iteration cursor for the > +old state > + * @new_plane_state: drm_plane_state iteration cursor for the > +new state > + * @__i: int iteration cursor, for macro-internal use > + * > + * This iterates over all planes in an atomic update in reverse order, > + * tracking both old and new state. This is useful in places where the > + * state delta needs to be considered, for example in atomic check functions. > + */ > +#define for_each_oldnew_plane_in_state_reverse(__state, plane, > old_plane_state, new_plane_state, __i) \ > + for ((__i) = ((__state)->dev->mode_config.num_total_plane - 1); \ > + (__i) >= 0;\ > + (__i)--) \ > + for_each_if ((__state)->planes[__i].ptr && \ > + ((plane) = (__state)->planes[__i].ptr, \ > + (old_plane_state) = > (__state)->planes[__i].old_state,\ > + (new_plane_state) = > (__state)->planes[__i].new_state, 1)) > + > +/** > * for_each_old_plane_in_state - iterate over all planes in an atomic update > * @__state: drm_atomic_state pointer > * @plane: drm_plane iteration cursor > -- > 2.7.4 > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH xf86-video-ati] Only change Set/MoveCursor hooks from what we expect
On Mon, Mar 5, 2018 at 12:44 PM, Michel Dänzerwrote: > From: Michel Dänzer > > Since xf86CursorCloseScreen runs after RADEONCloseScreen_KMS, > PointPriv->spriteFuncs doesn't point to the same struct in the latter as > in RADEONCursorInit_KMS. So we were restoring info->Set/MoveCursor to > the wrong struct. Then in the next server generation, > info->Set/MoveCursor would end up pointing to > drmmode_sprite_set/move_cursor, resulting in an infinite loop if one of > them was called. > > To avoid this, only change the Set/MoveCursor hooks if their values > match our expectations, otherwise leave them as is. This is kind of a > hack, but the alternative would be invasive and thus risky changes to > the way we're wrapping CloseScreen, and it's not even clear that can > work without changing xserver code. > > Fixes: 1fe8ca75974c ("Keep track of how many SW cursors are visible on > each screen") > Signed-off-by: Michel Dänzer Acked-by: Alex Deucher > --- > src/radeon_kms.c | 16 ++-- > 1 file changed, 10 insertions(+), 6 deletions(-) > > diff --git a/src/radeon_kms.c b/src/radeon_kms.c > index 85390e306..790d4be16 100644 > --- a/src/radeon_kms.c > +++ b/src/radeon_kms.c > @@ -2017,10 +2017,12 @@ static Bool RADEONCursorInit_KMS(ScreenPtr pScreen) > return FALSE; > } > > - info->SetCursor = PointPriv->spriteFuncs->SetCursor; > - info->MoveCursor = PointPriv->spriteFuncs->MoveCursor; > - PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor; > - PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor; > + if (PointPriv->spriteFuncs->SetCursor != drmmode_sprite_set_cursor) { > + info->SetCursor = PointPriv->spriteFuncs->SetCursor; > + info->MoveCursor = PointPriv->spriteFuncs->MoveCursor; > + PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor; > + PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor; > + } > } > > if (xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) > @@ -2184,8 +2186,10 @@ static Bool RADEONCloseScreen_KMS(ScreenPtr pScreen) > miPointerScreenPtr PointPriv = > dixLookupPrivate(>devPrivates, miPointerScreenKey); > > - PointPriv->spriteFuncs->SetCursor = info->SetCursor; > - PointPriv->spriteFuncs->MoveCursor = info->MoveCursor; > + if (PointPriv->spriteFuncs->SetCursor == drmmode_sprite_set_cursor) { > + PointPriv->spriteFuncs->SetCursor = info->SetCursor; > + PointPriv->spriteFuncs->MoveCursor = info->MoveCursor; > + } > } > > pScreen->BlockHandler = info->BlockHandler; > -- > 2.16.2 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 4/5] drm/ttm: add ttm_sg_tt_init
On Mon, Mar 5, 2018 at 10:06 PM, Christian Königwrote: > Ping? On a quick look, it looks like both are used sometimes. I believe this had something to do with the addition of Tegra support, but I'll need some time to look into the details of why/how these things are used again. Ben. > > > Am 27.02.2018 um 13:07 schrieb Christian König: >> >> Hi guys, >> >> at least on amdgpu and radeon the page array allocated by ttm_dma_tt_init >> is completely unused in the case of DMA-buf sharing. So I'm trying to get >> rid of that by only allocating the DMA address array. >> >> Now the only other user of DMA-buf together with ttm_dma_tt_init is >> Nouveau. So my question is are you guys using the page array anywhere in >> your kernel driver in case of a DMA-buf sharing? >> >> If no then I could just make this the default behavior for all drivers and >> save quite a bit of memory for everybody. >> >> Thanks, >> Christian. >> >> Am 27.02.2018 um 12:49 schrieb Christian König: >>> >>> This allows drivers to only allocate dma addresses, but not a page >>> array. >>> >>> Signed-off-by: Christian König >>> --- >>> drivers/gpu/drm/ttm/ttm_tt.c | 54 >>> >>> include/drm/ttm/ttm_tt.h | 2 ++ >>> 2 files changed, 47 insertions(+), 9 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c >>> index 8e0b525cda00..971133106ec2 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_tt.c >>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c >>> @@ -108,6 +108,16 @@ static int ttm_dma_tt_alloc_page_directory(struct >>> ttm_dma_tt *ttm) >>> return 0; >>> } >>> +static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm) >>> +{ >>> +ttm->dma_address = kvmalloc_array(ttm->ttm.num_pages, >>> + sizeof(*ttm->dma_address), >>> + GFP_KERNEL | __GFP_ZERO); >>> +if (!ttm->dma_address) >>> +return -ENOMEM; >>> +return 0; >>> +} >>> + >>> #ifdef CONFIG_X86 >>> static inline int ttm_tt_set_page_caching(struct page *p, >>> enum ttm_caching_state c_old, >>> @@ -227,8 +237,8 @@ void ttm_tt_destroy(struct ttm_tt *ttm) >>> ttm->func->destroy(ttm); >>> } >>> -int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, >>> -unsigned long size, uint32_t page_flags) >>> +void ttm_tt_init_fields(struct ttm_tt *ttm, struct ttm_bo_device *bdev, >>> +unsigned long size, uint32_t page_flags) >>> { >>> ttm->bdev = bdev; >>> ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; >>> @@ -236,6 +246,12 @@ int ttm_tt_init(struct ttm_tt *ttm, struct >>> ttm_bo_device *bdev, >>> ttm->page_flags = page_flags; >>> ttm->state = tt_unpopulated; >>> ttm->swap_storage = NULL; >>> +} >>> + >>> +int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, >>> +unsigned long size, uint32_t page_flags) >>> +{ >>> +ttm_tt_init_fields(ttm, bdev, size, page_flags); >>> if (ttm_tt_alloc_page_directory(ttm)) { >>> ttm_tt_destroy(ttm); >>> @@ -258,12 +274,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, >>> struct ttm_bo_device *bdev, >>> { >>> struct ttm_tt *ttm = _dma->ttm; >>> -ttm->bdev = bdev; >>> -ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; >>> -ttm->caching_state = tt_cached; >>> -ttm->page_flags = page_flags; >>> -ttm->state = tt_unpopulated; >>> -ttm->swap_storage = NULL; >>> +ttm_tt_init_fields(ttm, bdev, size, page_flags); >>> INIT_LIST_HEAD(_dma->pages_list); >>> if (ttm_dma_tt_alloc_page_directory(ttm_dma)) { >>> @@ -275,11 +286,36 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, >>> struct ttm_bo_device *bdev, >>> } >>> EXPORT_SYMBOL(ttm_dma_tt_init); >>> +int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device >>> *bdev, >>> + unsigned long size, uint32_t page_flags) >>> +{ >>> +struct ttm_tt *ttm = _dma->ttm; >>> +int ret; >>> + >>> +ttm_tt_init_fields(ttm, bdev, size, page_flags); >>> + >>> +INIT_LIST_HEAD(_dma->pages_list); >>> +if (page_flags & TTM_PAGE_FLAG_SG) >>> +ret = ttm_sg_tt_alloc_page_directory(ttm_dma); >>> +else >>> +ret = ttm_dma_tt_alloc_page_directory(ttm_dma); >>> +if (ret) { >>> +ttm_tt_destroy(ttm); >>> +pr_err("Failed allocating page table\n"); >>> +return -ENOMEM; >>> +} >>> +return 0; >>> +} >>> +EXPORT_SYMBOL(ttm_sg_tt_init); >>> + >>> void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma) >>> { >>> struct ttm_tt *ttm = _dma->ttm; >>> -kvfree(ttm->pages); >>> +if (ttm->pages) >>> +kvfree(ttm->pages); >>> +else >>> +kvfree(ttm_dma->dma_address); >>> ttm->pages = NULL; >>> ttm_dma->dma_address = NULL; >>> } >>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h >>> index
Modesetting (amdgpudrmfb) artifacts on RAVEN APU
Hi list, I'd like to report a very odd screen artifacts while running both 4.16-rc3, as well as latest 4.16-rc4 with git linux-firmware. I am using Ryzen 2400G with the integrate Vega. I am aware that RAVEN support is yet to be finished, however I've read that some people do run it already, so I figured I will report the issues, since other does not seems to hit it. I have amdgpu and all it's symbols built into the kernel image, and the firmware added to initrammfs. The moment modesetting is initializing I can see that native screen resolution goes, however, I can see only like 25% of the screen and this very top-left 25% of screen is duplicated to top-right. While the bottom half of screen is either black or have lines, usually gray, unless some text on screen had another color then it's green, blue, etc. Screenshots: https://i.imgur.com/qnDOKY7.jpg https://i.imgur.com/XH42zit.jpg The AMD symbols that I've enabled in kernel: CONFIG_CPU_SUP_AMD=y CONFIG_X86_MCE_AMD=y CONFIG_AMD_NB=y CONFIG_NET_VENDOR_AMD=y CONFIG_DRM_AMDGPU=y CONFIG_DRM_AMDGPU_SI=y CONFIG_DRM_AMDGPU_CIK=y CONFIG_DRM_AMD_ACP=y CONFIG_DRM_AMD_DC=y CONFIG_DRM_AMD_DC_FBC=y CONFIG_DRM_AMD_DC_DCN1_0=y CONFIG_HSA_AMD=y CONFIG_AMD_IOMMU=y CONFIG_AMD_IOMMU_V2=y The kernel log that had either drm, amd or firmware in there: [0.00] RAMDISK: [mem 0x7f88c000-0x7fff] [0.00] ACPI: SSDT 0x9BD94908 005367 (v02 AMD AmdTable 0002 MSFT 0202) [0.00] ACPI: SSDT 0x9BD99C70 00119C (v01 AMDAMD CPU 0001 AMD 0001) [0.00] ACPI: CRAT 0x9BD9AE10 000810 (v01 AMDAMD CRAT 0001 AMD 0001) [0.00] ACPI: CDIT 0x9BD9B620 29 (v01 AMDAMD CDIT 0001 AMD 0001) [0.00] ACPI: SSDT 0x9BD9B650 002E6E (v01 AMDAMD AOD 0001 INTL 20120913) [0.00] ACPI: IVRS 0x9BD9E580 D0 (v02 AMDAMD IVRS 0001 AMD ) [0.00] ACPI: SSDT 0x9BD9E650 F8 (v01 AMDAMD PT 1000 INTL 20120913) [0.00] ACPI: SSDT 0x9BD9E748 000E96 (v01 AMD AmdTable 0001 INTL 20120913) [0.00] ACPI: SSDT 0x9BD9F5E0 000850 (v01 AMD AmdTable 0001 INTL 20120913) [0.00] ACPI: SSDT 0x9BD9FE30 001993 (v01 AMD AmdTable 0001 INTL 20120913) [0.00] Kernel command line: BOOT_IMAGE=/bzImage-4.16.0-rc4 rootfstype=ext4 luks enc_root=/dev/sda2 lvm root=/dev/mapper/megumin-rootfs initrd=/initramfs.cpio.gz,/firmware-initramfs.cpio.gz [0.00] ACPI Error: AE_ALREADY_EXISTS, (SSDT: AMD PT) while loading table (20180105/tbxfload-228) [0.08] smpboot: CPU0: AMD Ryzen 5 2400G with Radeon Vega Graphics (family: 0x17, model: 0x11, stepping: 0x0) [0.08] Performance Events: Fam17h core perfctr, AMD PMU driver. [0.101786] ACPI: [Firmware Bug]: BIOS _OSI(Linux) query ignored [0.615782] AMD-Vi: IOMMU performance counters supported [0.623179] AMD-Vi: Found IOMMU at :00:00.2 cap 0x40 [0.623314] AMD-Vi: Extended features (0x4f77ef22294ada): [0.623684] AMD-Vi: Lazy IO/TLB flushing enabled [0.624533] amd_uncore: AMD NB counters detected [0.624666] amd_uncore: AMD LLC counters detected [0.625076] perf/amd_iommu: Detected AMD IOMMU #0 (2 banks, 4 counters/bank). [0.636229] AMD IOMMUv2 driver by Joerg Roedel[0.637179] [drm] amdgpu kernel modesetting enabled. [0.637409] [drm] initializing kernel modesetting (RAVEN 0x1002:0x15DD 0x1458:0xD000 0xC6). [0.637583] [drm] register mmio base: 0xFE50 [0.637709] [drm] register mmio size: 524288 [0.637852] [drm] probing gen 2 caps for device 1022:15db = 700d03/e [0.638005] [drm] probing mlw for device 1022:15db = 700d03 [0.638213] [drm] VCN decode is enabled in VM mode [0.638341] [drm] VCN encode is enabled in VM mode [0.660265] [drm] BIOS signature incorrect 74 7 [0.660422] [drm] vm size is 262144 GB, 4 levels, block size is 9-bit, fragment size is 9-bit [0.660515] amdgpu :09:00.0: VRAM: 1024M 0x00F4 - 0x00F43FFF (1024M used) [0.660607] amdgpu :09:00.0: GTT: 1024M 0x00F5 - 0x00F53FFF [0.660689] [drm] Detected VRAM RAM=1024M, BAR=256M [0.660756] [drm] RAM width 128bits UNKNOWN [0.661653] [drm] amdgpu: 1024M of VRAM memory ready [0.661720] [drm] amdgpu: 3072M of GTT memory ready. [0.661793] [drm] GART: num cpu pages 262144, num gpu pages 262144 [0.662027] [drm] PCIE GART of 1024M enabled (table at 0x00F40080). [0.663093] [drm] use_doorbell being set to: [true] [0.663229] [drm] Found VCN firmware Version: 1.73 Family ID: 18 [1.011745]
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
On Mon, Mar 5, 2018 at 12:14 PM, Christian Königwrote: > Am 05.03.2018 um 17:58 schrieb Felix Kuehling: >> >> On 2018-03-05 07:17 AM, Christian König wrote: >>> >>> Am 04.03.2018 um 03:34 schrieb Felix Kuehling: Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e..58153ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_KFIFO(vm->faults); vm->fault_credit = 16; +vm->vm_context = vm_context; >>> >>> I think we should drop the vm_context parameter and all the related >>> code in amdgpu_vm_init(). But that can be part of a later cleanup >>> patch as well. >> >> Yep. It will be a prerequisite for sharing VMs between graphics and >> compute. But then CPU vs SDMA page table update would need to be handled >> differently, on a per-call basis and probably some synchronization >> between them. >> >> Also, we'd need to fix shadow page table handling with CPU updates, or >> get rid of shadow page tables. I'm not sure why they're needed, TBH. > > > The idea behind shadow page tables is that you have the current state of the > pipeline if the GPU crashes. Since with CPU updates there is no pipeline > they doesn't make sense at all with them. They are also to protect against vram loss in the case of a GPU reset. Alex > > >> + return 0; error_free_root: @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - vm_context + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ +bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); +int r; + +r = amdgpu_bo_reserve(vm->root.base.bo, true); +if (r) +return r; + +/* Sanity checks */ +if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { +/* Can happen if ioctl is interrupted by a signal after + * this function already completed. Just return success. + */ +r = 0; +goto error; +} >>> >>> Ok, that is actually a show stopper. An interrupted IOCTL should never >>> have a visible effect. >>> >>> Is that just a theoretical issue or did you run into this in practice? >> >> It's theoretical. init_kfd_vm in patch 4 reserves the page directory >> again, validates it and waits for the validation to complete. Both the >> reservation and the waiting can be interrupted by signals. > > > No, both have parameters to control if they are interruptible or not for > exactly this reason. > >> This happens >> after amdgpu_vm_make_compute has completed. >> >> If we wanted to achieve "no visible effect", we'd need to roll back the >> conversion to a compute VM, which may involve more waiting. >> >> We have a similar issue in map_memory_to_gpu. We map on all GPUs and >> wait for a sync object in the end. If SDMA is used, this allows the SDMA >> on all GPUs to work concurrently. The wait in the end can be >> interrupted. Instead of rolling back the mapping on all GPUs (which >> would require more waiting), we just let the system call return >> -ERESTARTSYS and make sure the restart doesn't cause any trouble. > > > If waits are interruptible or not is controllable by parameters. At least > for importing the VM it is probably ok to disable this. > >> I've been looking for documentation about the expected behaviour for >> system calls interrupted by signals. All I can find is this statement in >> Documentation/kernel-hacking/hacking.rst: >>> >>> After you slept you should check if a signal occurred: the Unix/Linux >>> way of handling signals is to temporarily exit the system call with the >>> ``-ERESTARTSYS`` error. The system call entry code will switch back to >>> user context, process the signal
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
On 2018-03-05 12:14 PM, Christian König wrote: >> Also, we'd need to fix shadow page table handling with CPU updates, or >> get rid of shadow page tables. I'm not sure why they're needed, TBH. > > The idea behind shadow page tables is that you have the current state > of the pipeline if the GPU crashes. I see. So a GPU can be reset in the middle of a page table update without losing anything and without having to recreate the page table from scratch. > Since with CPU updates there is no pipeline they doesn't make sense at > all with them. That's true when all page table updates are done by the CPU. If we start mixing CPU and SDMA updates in the same VM, then CPU updates need to update the shadow page table as well. Regards, Felix ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/radeon: fix KV harvesting
Am 05.03.2018 um 19:30 schrieb Alex Deucher: Always set the graphics values to the max for the asic type. E.g., some 1 RB chips are actually 1 RB chips, others are actually harvested 2 RB chips. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353 Signed-off-by: Alex DeucherCc: sta...@vger.kernel.org Reviewed-by: Christian König for the series. --- drivers/gpu/drm/radeon/cik.c | 31 ++- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index d3045a371a55..7c73bc7e2f85 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev) case CHIP_KAVERI: rdev->config.cik.max_shader_engines = 1; rdev->config.cik.max_tile_pipes = 4; - if ((rdev->pdev->device == 0x1304) || - (rdev->pdev->device == 0x1305) || - (rdev->pdev->device == 0x130C) || - (rdev->pdev->device == 0x130F) || - (rdev->pdev->device == 0x1310) || - (rdev->pdev->device == 0x1311) || - (rdev->pdev->device == 0x131C)) { - rdev->config.cik.max_cu_per_sh = 8; - rdev->config.cik.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x1309) || - (rdev->pdev->device == 0x130A) || - (rdev->pdev->device == 0x130D) || - (rdev->pdev->device == 0x1313) || - (rdev->pdev->device == 0x131D)) { - rdev->config.cik.max_cu_per_sh = 6; - rdev->config.cik.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x1306) || - (rdev->pdev->device == 0x1307) || - (rdev->pdev->device == 0x130B) || - (rdev->pdev->device == 0x130E) || - (rdev->pdev->device == 0x1315) || - (rdev->pdev->device == 0x1318) || - (rdev->pdev->device == 0x131B)) { - rdev->config.cik.max_cu_per_sh = 4; - rdev->config.cik.max_backends_per_se = 1; - } else { - rdev->config.cik.max_cu_per_sh = 3; - rdev->config.cik.max_backends_per_se = 1; - } + rdev->config.cik.max_cu_per_sh = 8; + rdev->config.cik.max_backends_per_se = 2; rdev->config.cik.max_sh_per_se = 1; rdev->config.cik.max_texture_channel_caches = 4; rdev->config.cik.max_gprs = 256; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: fix KV harvesting
Always set the graphics values to the max for the asic type. E.g., some 1 RB chips are actually 1 RB chips, others are actually harvested 2 RB chips. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353 Signed-off-by: Alex DeucherCc: sta...@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 30 ++ 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 972d421caada..e13d9d83767b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4358,34 +4358,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) case CHIP_KAVERI: adev->gfx.config.max_shader_engines = 1; adev->gfx.config.max_tile_pipes = 4; - if ((adev->pdev->device == 0x1304) || - (adev->pdev->device == 0x1305) || - (adev->pdev->device == 0x130C) || - (adev->pdev->device == 0x130F) || - (adev->pdev->device == 0x1310) || - (adev->pdev->device == 0x1311) || - (adev->pdev->device == 0x131C)) { - adev->gfx.config.max_cu_per_sh = 8; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1309) || - (adev->pdev->device == 0x130A) || - (adev->pdev->device == 0x130D) || - (adev->pdev->device == 0x1313) || - (adev->pdev->device == 0x131D)) { - adev->gfx.config.max_cu_per_sh = 6; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1306) || - (adev->pdev->device == 0x1307) || - (adev->pdev->device == 0x130B) || - (adev->pdev->device == 0x130E) || - (adev->pdev->device == 0x1315) || - (adev->pdev->device == 0x131B)) { - adev->gfx.config.max_cu_per_sh = 4; - adev->gfx.config.max_backends_per_se = 1; - } else { - adev->gfx.config.max_cu_per_sh = 3; - adev->gfx.config.max_backends_per_se = 1; - } + adev->gfx.config.max_cu_per_sh = 8; + adev->gfx.config.max_backends_per_se = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_texture_channel_caches = 4; adev->gfx.config.max_gprs = 256; -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/radeon: fix KV harvesting
Always set the graphics values to the max for the asic type. E.g., some 1 RB chips are actually 1 RB chips, others are actually harvested 2 RB chips. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99353 Signed-off-by: Alex DeucherCc: sta...@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 31 ++- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index d3045a371a55..7c73bc7e2f85 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev) case CHIP_KAVERI: rdev->config.cik.max_shader_engines = 1; rdev->config.cik.max_tile_pipes = 4; - if ((rdev->pdev->device == 0x1304) || - (rdev->pdev->device == 0x1305) || - (rdev->pdev->device == 0x130C) || - (rdev->pdev->device == 0x130F) || - (rdev->pdev->device == 0x1310) || - (rdev->pdev->device == 0x1311) || - (rdev->pdev->device == 0x131C)) { - rdev->config.cik.max_cu_per_sh = 8; - rdev->config.cik.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x1309) || - (rdev->pdev->device == 0x130A) || - (rdev->pdev->device == 0x130D) || - (rdev->pdev->device == 0x1313) || - (rdev->pdev->device == 0x131D)) { - rdev->config.cik.max_cu_per_sh = 6; - rdev->config.cik.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x1306) || - (rdev->pdev->device == 0x1307) || - (rdev->pdev->device == 0x130B) || - (rdev->pdev->device == 0x130E) || - (rdev->pdev->device == 0x1315) || - (rdev->pdev->device == 0x1318) || - (rdev->pdev->device == 0x131B)) { - rdev->config.cik.max_cu_per_sh = 4; - rdev->config.cik.max_backends_per_se = 1; - } else { - rdev->config.cik.max_cu_per_sh = 3; - rdev->config.cik.max_backends_per_se = 1; - } + rdev->config.cik.max_cu_per_sh = 8; + rdev->config.cik.max_backends_per_se = 2; rdev->config.cik.max_sh_per_se = 1; rdev->config.cik.max_texture_channel_caches = 4; rdev->config.cik.max_gprs = 256; -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/4] drm/amdgpu: remove useless variable
On Sun, Mar 4, 2018 at 11:09 PM, Monk Liuwrote: > Change-Id: Iec96c7762d791164c719ec64ec410e00a8cf5d90 > Signed-off-by: Monk Liu Already fixed in amd-staging-drm-next. Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index 43ce2e5..42c1401 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -141,7 +141,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, > uint32_t reg) > { > signed long r, cnt = 0; > unsigned long flags; > - uint32_t val, seq; > + uint32_t seq; > struct amdgpu_kiq *kiq = >gfx.kiq; > struct amdgpu_ring *ring = >ring; > > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/3] drm/amd/pp: Implement update_dpm_settings on CI
On Mon, Mar 5, 2018 at 2:55 AM, Rex Zhuwrote: > use SW method to update DPM settings by updating SRAM > directly on CI. > > Change-Id: Ie9ed6c3a0e1c327cc9a9b06bec47b1cede87278d > Signed-off-by: Rex Zhu Series is: Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 97 > > 1 file changed, 97 insertions(+) > > diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c > b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c > index 76f700f..179d00c 100644 > --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c > +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c > @@ -2819,6 +2819,102 @@ static int ci_start_smu(struct pp_hwmgr *hwmgr) > return 0; > } > > +static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr, > + void *profile_setting) > +{ > + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); > + struct ci_smumgr *smu_data = (struct ci_smumgr *) > + (hwmgr->smu_backend); > + struct profile_mode_setting *setting; > + struct SMU7_Discrete_GraphicsLevel *levels = > + smu_data->smc_state_table.GraphicsLevel; > + uint32_t array = smu_data->dpm_table_start + > + offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); > + > + uint32_t mclk_array = smu_data->dpm_table_start + > + offsetof(SMU7_Discrete_DpmTable, MemoryLevel); > + struct SMU7_Discrete_MemoryLevel *mclk_levels = > + smu_data->smc_state_table.MemoryLevel; > + uint32_t i; > + uint32_t offset, up_hyst_offset, down_hyst_offset, > clk_activity_offset, tmp; > + > + if (profile_setting == NULL) > + return -EINVAL; > + > + setting = (struct profile_mode_setting *)profile_setting; > + > + if (setting->bupdate_sclk) { > + if (!data->sclk_dpm_key_disabled) > + smum_send_msg_to_smc(hwmgr, > PPSMC_MSG_SCLKDPM_FreezeLevel); > + for (i = 0; i < > smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { > + if (levels[i].ActivityLevel != > + cpu_to_be16(setting->sclk_activity)) { > + levels[i].ActivityLevel = > cpu_to_be16(setting->sclk_activity); > + > + clk_activity_offset = array + > (sizeof(SMU7_Discrete_GraphicsLevel) * i) > + + > offsetof(SMU7_Discrete_GraphicsLevel, ActivityLevel); > + offset = clk_activity_offset & ~0x3; > + tmp = > PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, > offset)); > + tmp = > phm_set_field_to_u32(clk_activity_offset, tmp, levels[i].ActivityLevel, > sizeof(uint16_t)); > + cgs_write_ind_register(hwmgr->device, > CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); > + > + } > + if (levels[i].UpH != setting->sclk_up_hyst || > + levels[i].DownH != setting->sclk_down_hyst) { > + levels[i].UpH = setting->sclk_up_hyst; > + levels[i].DownH = setting->sclk_down_hyst; > + up_hyst_offset = array + > (sizeof(SMU7_Discrete_GraphicsLevel) * i) > + + > offsetof(SMU7_Discrete_GraphicsLevel, UpH); > + down_hyst_offset = array + > (sizeof(SMU7_Discrete_GraphicsLevel) * i) > + + > offsetof(SMU7_Discrete_GraphicsLevel, DownH); > + offset = up_hyst_offset & ~0x3; > + tmp = > PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, > offset)); > + tmp = phm_set_field_to_u32(up_hyst_offset, > tmp, levels[i].UpH, sizeof(uint8_t)); > + tmp = phm_set_field_to_u32(down_hyst_offset, > tmp, levels[i].DownH, sizeof(uint8_t)); > + cgs_write_ind_register(hwmgr->device, > CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); > + } > + } > + if (!data->sclk_dpm_key_disabled) > + smum_send_msg_to_smc(hwmgr, > PPSMC_MSG_SCLKDPM_UnfreezeLevel); > + } > + > + if (setting->bupdate_mclk) { > + if (!data->mclk_dpm_key_disabled) > + smum_send_msg_to_smc(hwmgr, > PPSMC_MSG_MCLKDPM_FreezeLevel); > + for (i = 0; i < > smu_data->smc_state_table.MemoryDpmLevelCount; i++) { > + if (mclk_levels[i].ActivityLevel != > +
Re: [PATCH 3/3] drm/amdgpu: further mitigate workaround for i915
On Mon, Mar 5, 2018 at 7:00 AM, Christian Königwrote: > Disable the workaround on imported BOs as well. > > Signed-off-by: Christian König Series is: Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 12 +--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > index 8ce74a1d9966..fb66b45548d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > @@ -107,12 +107,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, > ww_mutex_lock(>lock, NULL); > ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, >AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, ); > - ww_mutex_unlock(>lock); > if (ret) > - return ERR_PTR(ret); > + goto error; > + > + if (attach->dmabuf->ops != _dmabuf_ops) > + bo->prime_shared_count = 1; > > - bo->prime_shared_count = 1; > + ww_mutex_unlock(>lock); > return >gem_base; > + > +error: > + ww_mutex_unlock(>lock); > + return ERR_PTR(ret); > } > > static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, > -- > 2.14.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory
On Mon, Mar 5, 2018 at 5:43 AM, Rex Zhuwrote: > use amdgpu_bo_create/free_kernel directly. > > Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7 > Signed-off-by: Rex Zhu > --- > drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 7 - > drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c | 39 +++--- > drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h | 11 +- > drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c | 53 +++ > drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h | 11 +- > drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 52 +++ > drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h | 11 +- > drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 51 --- > .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 155 > ++--- > .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h | 11 +- > 10 files changed, 176 insertions(+), 225 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > index e1f6e83..8872c5c 100644 > --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h > @@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION { > extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, > uint16_t msg, uint32_t parameter); > > -extern int smu_allocate_memory(void *device, uint32_t size, > -enum cgs_gpu_mem_type type, > -uint32_t byte_align, uint64_t *mc_addr, > -void **kptr, void *handle); > - > -extern int smu_free_memory(void *device, void *handle); > - > extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr); > > extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); > diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > index 7fe4c11..0d2892d 100644 > --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c > @@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr) > > static int cz_smu_init(struct pp_hwmgr *hwmgr) > { > - uint64_t mc_addr = 0; > int ret = 0; > struct cz_smumgr *cz_smu; > > @@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr) > ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) + > ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32); > > - ret = smu_allocate_memory(hwmgr->device, > + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, > cz_smu->toc_buffer.data_size, > - CGS_GPU_MEM_TYPE__GART_CACHEABLE, > PAGE_SIZE, > - _addr, > - _smu->toc_buffer.kaddr, > - _smu->toc_buffer.handle); > + AMDGPU_GEM_DOMAIN_VRAM, > + _smu->toc_buffer.handle, > + _smu->toc_buffer.mc_addr, > + _smu->toc_buffer.kaddr); > if (ret != 0) > return -1; > > - cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); > - cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); > + cz_smu->toc_buffer.mc_addr_high = > smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); > + cz_smu->toc_buffer.mc_addr_low = > smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); Another potential future patch would be to replace smu_upper_32_bits/smu_lower_32_bits with the standard kernel macros. Series is: Reviewed-by: Alex Deucher Alex > > - ret = smu_allocate_memory(hwmgr->device, > + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, > cz_smu->smu_buffer.data_size, > - CGS_GPU_MEM_TYPE__GART_CACHEABLE, > PAGE_SIZE, > - _addr, > - _smu->smu_buffer.kaddr, > - _smu->smu_buffer.handle); > + AMDGPU_GEM_DOMAIN_VRAM, > + _smu->smu_buffer.handle, > + _smu->toc_buffer.mc_addr, > + _smu->smu_buffer.kaddr); > if (ret != 0) > return -1; > > - cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); > - cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); > + cz_smu->smu_buffer.mc_addr_high = > smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); > + cz_smu->smu_buffer.mc_addr_low = > smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); > > if (0 != cz_smu_populate_single_scratch_entry(hwmgr, >
Re: [PATCH] Revert "drm/amd/pp: Add a pp feature mask bit for AutoWattman feature"
On Mon, Mar 5, 2018 at 1:46 AM, Rex Zhuwrote: > This reverts commit e429ea87b2939c4cce1b439baf6d76535a0767f2. > > Implement Workload Aware Dynamic power management instand of > AutoWattman feature in linux. > > Change-Id: I619ba5695b14e1d165829c98d2776e5173f2737e > Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h > b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h > index 94df6ee..b151ad85 100644 > --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h > +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h > @@ -85,7 +85,6 @@ enum PP_FEATURE_MASK { > PP_SOCCLK_DPM_MASK = 0x1000, > PP_DCEFCLK_DPM_MASK = 0x2000, > PP_OVERDRIVE_MASK = 0x4000, > - PP_AUTOWATTMAN_MASK = 0x8000, > }; > > enum PHM_BackEnd_Magic { > -- > 1.9.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/pp: Fix incorrect return value in smu7_check_clk_voltage_valid
On Fri, Mar 2, 2018 at 5:49 AM, Rex Zhuwrote: > Change-Id: I248982c5e0100b7a975d1a19781749baeadd71f4 > Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c > index dbfe139..10fb3ec 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c > @@ -4692,7 +4692,7 @@ static bool smu7_check_clk_voltage_valid(struct > pp_hwmgr *hwmgr, > struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; > > if (table_info == NULL) > - return -EINVAL; > + return false; > > dep_sclk_table = table_info->vdd_dep_on_sclk; > min_vddc = dep_sclk_table->entries[0].vddc; > -- > 1.9.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-ati] Only change Set/MoveCursor hooks from what we expect
From: Michel DänzerSince xf86CursorCloseScreen runs after RADEONCloseScreen_KMS, PointPriv->spriteFuncs doesn't point to the same struct in the latter as in RADEONCursorInit_KMS. So we were restoring info->Set/MoveCursor to the wrong struct. Then in the next server generation, info->Set/MoveCursor would end up pointing to drmmode_sprite_set/move_cursor, resulting in an infinite loop if one of them was called. To avoid this, only change the Set/MoveCursor hooks if their values match our expectations, otherwise leave them as is. This is kind of a hack, but the alternative would be invasive and thus risky changes to the way we're wrapping CloseScreen, and it's not even clear that can work without changing xserver code. Fixes: 1fe8ca75974c ("Keep track of how many SW cursors are visible on each screen") Signed-off-by: Michel Dänzer --- src/radeon_kms.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/radeon_kms.c b/src/radeon_kms.c index 85390e306..790d4be16 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -2017,10 +2017,12 @@ static Bool RADEONCursorInit_KMS(ScreenPtr pScreen) return FALSE; } - info->SetCursor = PointPriv->spriteFuncs->SetCursor; - info->MoveCursor = PointPriv->spriteFuncs->MoveCursor; - PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor; - PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor; + if (PointPriv->spriteFuncs->SetCursor != drmmode_sprite_set_cursor) { + info->SetCursor = PointPriv->spriteFuncs->SetCursor; + info->MoveCursor = PointPriv->spriteFuncs->MoveCursor; + PointPriv->spriteFuncs->SetCursor = drmmode_sprite_set_cursor; + PointPriv->spriteFuncs->MoveCursor = drmmode_sprite_move_cursor; + } } if (xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) @@ -2184,8 +2186,10 @@ static Bool RADEONCloseScreen_KMS(ScreenPtr pScreen) miPointerScreenPtr PointPriv = dixLookupPrivate(>devPrivates, miPointerScreenKey); - PointPriv->spriteFuncs->SetCursor = info->SetCursor; - PointPriv->spriteFuncs->MoveCursor = info->MoveCursor; + if (PointPriv->spriteFuncs->SetCursor == drmmode_sprite_set_cursor) { + PointPriv->spriteFuncs->SetCursor = info->SetCursor; + PointPriv->spriteFuncs->MoveCursor = info->MoveCursor; + } } pScreen->BlockHandler = info->BlockHandler; -- 2.16.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
On 2018-03-05 12:14 PM, Christian König wrote: > Am 05.03.2018 um 17:58 schrieb Felix Kuehling: >> On 2018-03-05 07:17 AM, Christian König wrote: >>> Am 04.03.2018 um 03:34 schrieb Felix Kuehling: Signed-off-by: Felix Kuehling--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e..58153ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_KFIFO(vm->faults); vm->fault_credit = 16; + vm->vm_context = vm_context; >>> I think we should drop the vm_context parameter and all the related >>> code in amdgpu_vm_init(). But that can be part of a later cleanup >>> patch as well. >> Yep. It will be a prerequisite for sharing VMs between graphics and >> compute. But then CPU vs SDMA page table update would need to be handled >> differently, on a per-call basis and probably some synchronization >> between them. >> >> Also, we'd need to fix shadow page table handling with CPU updates, or >> get rid of shadow page tables. I'm not sure why they're needed, TBH. > > The idea behind shadow page tables is that you have the current state > of the pipeline if the GPU crashes. Since with CPU updates there is no > pipeline they doesn't make sense at all with them. > >> + return 0; error_free_root: @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - vm_context + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + int r; + + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + return r; + + /* Sanity checks */ + if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + /* Can happen if ioctl is interrupted by a signal after + * this function already completed. Just return success. + */ + r = 0; + goto error; + } >>> Ok, that is actually a show stopper. An interrupted IOCTL should never >>> have a visible effect. >>> >>> Is that just a theoretical issue or did you run into this in practice? >> It's theoretical. init_kfd_vm in patch 4 reserves the page directory >> again, validates it and waits for the validation to complete. Both the >> reservation and the waiting can be interrupted by signals. > > No, both have parameters to control if they are interruptible or not > for exactly this reason. > >> This happens >> after amdgpu_vm_make_compute has completed. >> >> If we wanted to achieve "no visible effect", we'd need to roll back the >> conversion to a compute VM, which may involve more waiting. >> >> We have a similar issue in map_memory_to_gpu. We map on all GPUs and >> wait for a sync object in the end. If SDMA is used, this allows the SDMA >> on all GPUs to work concurrently. The wait in the end can be >> interrupted. Instead of rolling back the mapping on all GPUs (which >> would require more waiting), we just let the system call return >> -ERESTARTSYS and make sure the restart doesn't cause any trouble. > > If waits are interruptible or not is controllable by parameters. At > least for importing the VM it is probably ok to disable this. Sure, I can change that. > >> I've been looking for documentation about the expected behaviour for >> system calls interrupted by signals. All I can find is this statement in >> Documentation/kernel-hacking/hacking.rst: >>> After you slept you should check if a signal occurred: the Unix/Linux >>> way of handling signals is to temporarily exit the system call with the >>> ``-ERESTARTSYS`` error. The system call entry code will switch back to >>> user context, process the signal handler and then your system call will >>> be restarted (unless the user disabled that). So you should be prepared >>> to
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
Am 05.03.2018 um 17:58 schrieb Felix Kuehling: On 2018-03-05 07:17 AM, Christian König wrote: Am 04.03.2018 um 03:34 schrieb Felix Kuehling: Signed-off-by: Felix Kuehling--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e..58153ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_KFIFO(vm->faults); vm->fault_credit = 16; + vm->vm_context = vm_context; I think we should drop the vm_context parameter and all the related code in amdgpu_vm_init(). But that can be part of a later cleanup patch as well. Yep. It will be a prerequisite for sharing VMs between graphics and compute. But then CPU vs SDMA page table update would need to be handled differently, on a per-call basis and probably some synchronization between them. Also, we'd need to fix shadow page table handling with CPU updates, or get rid of shadow page tables. I'm not sure why they're needed, TBH. The idea behind shadow page tables is that you have the current state of the pipeline if the GPU crashes. Since with CPU updates there is no pipeline they doesn't make sense at all with them. + return 0; error_free_root: @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - vm_context + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + int r; + + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + return r; + + /* Sanity checks */ + if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + /* Can happen if ioctl is interrupted by a signal after + * this function already completed. Just return success. + */ + r = 0; + goto error; + } Ok, that is actually a show stopper. An interrupted IOCTL should never have a visible effect. Is that just a theoretical issue or did you run into this in practice? It's theoretical. init_kfd_vm in patch 4 reserves the page directory again, validates it and waits for the validation to complete. Both the reservation and the waiting can be interrupted by signals. No, both have parameters to control if they are interruptible or not for exactly this reason. This happens after amdgpu_vm_make_compute has completed. If we wanted to achieve "no visible effect", we'd need to roll back the conversion to a compute VM, which may involve more waiting. We have a similar issue in map_memory_to_gpu. We map on all GPUs and wait for a sync object in the end. If SDMA is used, this allows the SDMA on all GPUs to work concurrently. The wait in the end can be interrupted. Instead of rolling back the mapping on all GPUs (which would require more waiting), we just let the system call return -ERESTARTSYS and make sure the restart doesn't cause any trouble. If waits are interruptible or not is controllable by parameters. At least for importing the VM it is probably ok to disable this. I've been looking for documentation about the expected behaviour for system calls interrupted by signals. All I can find is this statement in Documentation/kernel-hacking/hacking.rst: After you slept you should check if a signal occurred: the Unix/Linux way of handling signals is to temporarily exit the system call with the ``-ERESTARTSYS`` error. The system call entry code will switch back to user context, process the signal handler and then your system call will be restarted (unless the user disabled that). So you should be prepared to process the restart, e.g. if you're in the middle of manipulating some data structure. The same paragraph is also copied in https://www.kernel.org/doc/htmldocs/kernel-hacking/ioctls.html. The key sentence is "should be prepared to process the restart, e.g. if you're in the middle of manipulating some data structure". Rolling back everything would achieve that, but it's not the only way and not the most efficient way. In this case, I'm handling the restart by checking whether the VM is already a compute
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
On 2018-03-05 07:17 AM, Christian König wrote: > Am 04.03.2018 um 03:34 schrieb Felix Kuehling: >> Signed-off-by: Felix Kuehling>> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 >> ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + >> 2 files changed, 83 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 5afbc5e..58153ef 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> INIT_KFIFO(vm->faults); >> vm->fault_credit = 16; >> + vm->vm_context = vm_context; > > I think we should drop the vm_context parameter and all the related > code in amdgpu_vm_init(). But that can be part of a later cleanup > patch as well. Yep. It will be a prerequisite for sharing VMs between graphics and compute. But then CPU vs SDMA page table update would need to be handled differently, on a per-call basis and probably some synchronization between them. Also, we'd need to fix shadow page table handling with CPU updates, or get rid of shadow page tables. I'm not sure why they're needed, TBH. > >> + >> return 0; >> error_free_root: >> @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> } >> /** >> + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM >> + * >> + * This only works on GFX VMs that don't have any BOs added and no >> + * page tables allocated yet. >> + * >> + * Changes the following VM parameters: >> + * - vm_context >> + * - use_cpu_for_update >> + * - pte_supports_ats >> + * - pasid (old PASID is released, because compute manages its own >> PASIDs) >> + * >> + * Reinitializes the page directory to reflect the changed ATS >> + * setting. May leave behind an unused shadow BO for the page >> + * directory when switching from SDMA updates to CPU updates. >> + * >> + * Returns 0 for success, -errno for errors. >> + */ >> +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct >> amdgpu_vm *vm) >> +{ >> + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); >> + int r; >> + >> + r = amdgpu_bo_reserve(vm->root.base.bo, true); >> + if (r) >> + return r; >> + >> + /* Sanity checks */ >> + if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { >> + /* Can happen if ioctl is interrupted by a signal after >> + * this function already completed. Just return success. >> + */ >> + r = 0; >> + goto error; >> + } > > Ok, that is actually a show stopper. An interrupted IOCTL should never > have a visible effect. > > Is that just a theoretical issue or did you run into this in practice? It's theoretical. init_kfd_vm in patch 4 reserves the page directory again, validates it and waits for the validation to complete. Both the reservation and the waiting can be interrupted by signals. This happens after amdgpu_vm_make_compute has completed. If we wanted to achieve "no visible effect", we'd need to roll back the conversion to a compute VM, which may involve more waiting. We have a similar issue in map_memory_to_gpu. We map on all GPUs and wait for a sync object in the end. If SDMA is used, this allows the SDMA on all GPUs to work concurrently. The wait in the end can be interrupted. Instead of rolling back the mapping on all GPUs (which would require more waiting), we just let the system call return -ERESTARTSYS and make sure the restart doesn't cause any trouble. I've been looking for documentation about the expected behaviour for system calls interrupted by signals. All I can find is this statement in Documentation/kernel-hacking/hacking.rst: > After you slept you should check if a signal occurred: the Unix/Linux > way of handling signals is to temporarily exit the system call with the > ``-ERESTARTSYS`` error. The system call entry code will switch back to > user context, process the signal handler and then your system call will > be restarted (unless the user disabled that). So you should be prepared > to process the restart, e.g. if you're in the middle of manipulating > some data structure. The same paragraph is also copied in https://www.kernel.org/doc/htmldocs/kernel-hacking/ioctls.html. The key sentence is "should be prepared to process the restart, e.g. if you're in the middle of manipulating some data structure". Rolling back everything would achieve that, but it's not the only way and not the most efficient way. In this case, I'm handling the restart by checking whether the VM is already a compute VM. So on the restart the conversion to a compute VM becomes a no-op. Similarly, in the map_memory_to_gpu ioctl, mapping on a GPU where the memory is already mapped becomes a no-op, which handles the restart case. In this case we even added a specific test for this condition in
Re:
Michel Dänzerwrites: > On 2018-03-03 05:49 AM, Keith Packard wrote: >> Here are the patches to the modesetting driver amended for the amdgpu >> driver. > > Thanks for the patches. Unfortunately, since this driver still has to > compile and work with xserver >= 1.13, at least patches 1 & 3 cannot be > applied as is. > > I was going to port these and take care of that anyway, though I might > not get around to it before April. If it can't wait that long, I can > give you details about what needs to be done. I'm good with that -- I needed this to test amdgpu vs modesetting for some applications, and just having the patches with support is good enough for me. -- -keith signature.asc Description: PGP signature ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: Enable scatter gather display support
There are three major options when SG capability is enabled, 1) Allocate everything when possible from GTT memory 2) Allocate everything when possible from VRAM 3) Allow both VRAM/GTT to be available Each has its own pros/cons, and it has not been decided which direction is going to be used for all PCs. So it seems beneficial to provide all the options. Sam On 2018-03-05 05:17 AM, Michel Dänzer wrote: > On 2018-03-03 12:25 AM, Samuel Li wrote: >> It's enabled by default. -1 is auto, to allow both vram and gtt >> memory be available, for testing purpose only. > > Do we really need a module parameter for this? There's already too many > of them. The driver should know which to use in which cases, and > developers can easily tweak the code. Users should never need to change > this. > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Expose scatter gather display to user space.
>> +if (amdgpu_display_framebuffer_domains(adev) == >> AMDGPU_GEM_DOMAIN_GTT) > > This check should probably be & instead of ==. Thought about that as well. For mixed VRAM/GTT display case, since it has some complications, I prefer to do it later. Sam On 2018-03-03 08:42 AM, Christian König wrote: > Am 03.03.2018 um 00:26 schrieb Samuel Li: >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ >> include/uapi/drm/amdgpu_drm.h | 1 + >> 2 files changed, 4 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> index 51a4b08..2cb344f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> @@ -31,6 +31,7 @@ >> #include "amdgpu_sched.h" >> #include "amdgpu_uvd.h" >> #include "amdgpu_vce.h" >> +#include "amdgpu_display.h" >> #include >> #include >> @@ -578,6 +579,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, >> void *data, struct drm_file >> dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; >> if (amdgpu_sriov_vf(adev)) >> dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; >> + if (amdgpu_display_framebuffer_domains(adev) == >> AMDGPU_GEM_DOMAIN_GTT) > > This check should probably be & instead of ==. > > Christian. > >> + dev_info.ids_flags |= AMDGPU_IDS_FLAGS_SGDISPLAY; >> vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; >> vm_size -= AMDGPU_VA_RESERVED_SIZE; >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h >> index fe17b67..2f30b98 100644 >> --- a/include/uapi/drm/amdgpu_drm.h >> +++ b/include/uapi/drm/amdgpu_drm.h >> @@ -584,6 +584,7 @@ struct drm_amdgpu_cs_chunk_data { >> */ >> #define AMDGPU_IDS_FLAGS_FUSION 0x1 >> #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 >> +#define AMDGPU_IDS_FLAGS_SGDISPLAY 0x4 >> /* indicate if acceleration can be working */ >> #define AMDGPU_INFO_ACCEL_WORKING 0x00 > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory
will remove repeated assignment to mc_addr in verion 2. Best Regards Rex From: amd-gfxon behalf of Rex Zhu Sent: Monday, March 5, 2018 6:43 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, Rex Subject: [PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory use amdgpu_bo_create/free_kernel directly. Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 7 - drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c | 39 +++--- drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c | 53 +++ drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 52 +++ drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 51 --- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 155 ++--- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h | 11 +- 10 files changed, 176 insertions(+), 225 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index e1f6e83..8872c5c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION { extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter); -extern int smu_allocate_memory(void *device, uint32_t size, -enum cgs_gpu_mem_type type, -uint32_t byte_align, uint64_t *mc_addr, -void **kptr, void *handle); - -extern int smu_free_memory(void *device, void *handle); - extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr); extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c index 7fe4c11..0d2892d 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c @@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr) static int cz_smu_init(struct pp_hwmgr *hwmgr) { - uint64_t mc_addr = 0; int ret = 0; struct cz_smumgr *cz_smu; @@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr) ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) + ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32); - ret = smu_allocate_memory(hwmgr->device, + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, cz_smu->toc_buffer.data_size, - CGS_GPU_MEM_TYPE__GART_CACHEABLE, PAGE_SIZE, - _addr, - _smu->toc_buffer.kaddr, - _smu->toc_buffer.handle); + AMDGPU_GEM_DOMAIN_VRAM, + _smu->toc_buffer.handle, + _smu->toc_buffer.mc_addr, + _smu->toc_buffer.kaddr); if (ret != 0) return -1; - cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); - cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); + cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); + cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); - ret = smu_allocate_memory(hwmgr->device, + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, cz_smu->smu_buffer.data_size, - CGS_GPU_MEM_TYPE__GART_CACHEABLE, PAGE_SIZE, - _addr, - _smu->smu_buffer.kaddr, - _smu->smu_buffer.handle); + AMDGPU_GEM_DOMAIN_VRAM, + _smu->smu_buffer.handle, + _smu->toc_buffer.mc_addr, + _smu->smu_buffer.kaddr); if (ret != 0) return -1; - cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); - cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); + cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); + cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); if (0 != cz_smu_populate_single_scratch_entry(hwmgr, CZ_SCRATCH_ENTRY_UCODE_ID_RLC_SCRATCH, @@ -845,10 +844,12 @@ static int
Re: [PATCH 03/14] drm/amdgpu: Add helper to turn an existing VM into a compute VM
Am 04.03.2018 um 03:34 schrieb Felix Kuehling: Signed-off-by: Felix Kuehling--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 2 files changed, 83 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e..58153ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2350,6 +2350,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_KFIFO(vm->faults); vm->fault_credit = 16; + vm->vm_context = vm_context; I think we should drop the vm_context parameter and all the related code in amdgpu_vm_init(). But that can be part of a later cleanup patch as well. + return 0; error_free_root: @@ -2364,6 +2366,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - vm_context + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + int r; + + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + return r; + + /* Sanity checks */ + if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + /* Can happen if ioctl is interrupted by a signal after +* this function already completed. Just return success. +*/ + r = 0; + goto error; + } Ok, that is actually a show stopper. An interrupted IOCTL should never have a visible effect. Is that just a theoretical issue or did you run into this in practice? Regards, Christian. + if (!RB_EMPTY_ROOT(>va.rb_root) || vm->root.entries) { + r = -EINVAL; + goto error; + } + + /* Check if PD needs to be reinitialized and do it before +* changing any other state, in case it fails. +*/ + if (pte_support_ats != vm->pte_support_ats) { + /* TODO: This depends on a patch series by Christian. +* It's only needed for GFX9 GPUs, which aren't +* supported by upstream KFD yet. + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + pte_support_ats); + if (r) + goto error; + */ + } + + /* Update VM state */ + vm->vm_context = AMDGPU_VM_CONTEXT_COMPUTE; + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->pte_support_ats = pte_support_ats; + DRM_DEBUG_DRIVER("VM update mode is %s\n", +vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); + + if (vm->pasid) { + unsigned long flags; + + spin_lock_irqsave(>vm_manager.pasid_lock, flags); + idr_remove(>vm_manager.pasid_idr, vm->pasid); + spin_unlock_irqrestore(>vm_manager.pasid_lock, flags); + + vm->pasid = 0; + } + +error: + amdgpu_bo_unreserve(vm->root.base.bo); + return r; +} + +/** * amdgpu_vm_free_levels - free PD/PT levels * * @adev: amdgpu device structure diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 40b4e09..7f50a38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -263,6 +263,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, unsigned int pasid); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org
Re: [PATCH 4/5] drm/ttm: add ttm_sg_tt_init
Ping? Am 27.02.2018 um 13:07 schrieb Christian König: Hi guys, at least on amdgpu and radeon the page array allocated by ttm_dma_tt_init is completely unused in the case of DMA-buf sharing. So I'm trying to get rid of that by only allocating the DMA address array. Now the only other user of DMA-buf together with ttm_dma_tt_init is Nouveau. So my question is are you guys using the page array anywhere in your kernel driver in case of a DMA-buf sharing? If no then I could just make this the default behavior for all drivers and save quite a bit of memory for everybody. Thanks, Christian. Am 27.02.2018 um 12:49 schrieb Christian König: This allows drivers to only allocate dma addresses, but not a page array. Signed-off-by: Christian König--- drivers/gpu/drm/ttm/ttm_tt.c | 54 include/drm/ttm/ttm_tt.h | 2 ++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 8e0b525cda00..971133106ec2 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -108,6 +108,16 @@ static int ttm_dma_tt_alloc_page_directory(struct ttm_dma_tt *ttm) return 0; } +static int ttm_sg_tt_alloc_page_directory(struct ttm_dma_tt *ttm) +{ + ttm->dma_address = kvmalloc_array(ttm->ttm.num_pages, + sizeof(*ttm->dma_address), + GFP_KERNEL | __GFP_ZERO); + if (!ttm->dma_address) + return -ENOMEM; + return 0; +} + #ifdef CONFIG_X86 static inline int ttm_tt_set_page_caching(struct page *p, enum ttm_caching_state c_old, @@ -227,8 +237,8 @@ void ttm_tt_destroy(struct ttm_tt *ttm) ttm->func->destroy(ttm); } -int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags) +void ttm_tt_init_fields(struct ttm_tt *ttm, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags) { ttm->bdev = bdev; ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -236,6 +246,12 @@ int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, ttm->page_flags = page_flags; ttm->state = tt_unpopulated; ttm->swap_storage = NULL; +} + +int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags) +{ + ttm_tt_init_fields(ttm, bdev, size, page_flags); if (ttm_tt_alloc_page_directory(ttm)) { ttm_tt_destroy(ttm); @@ -258,12 +274,7 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, { struct ttm_tt *ttm = _dma->ttm; - ttm->bdev = bdev; - ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - ttm->caching_state = tt_cached; - ttm->page_flags = page_flags; - ttm->state = tt_unpopulated; - ttm->swap_storage = NULL; + ttm_tt_init_fields(ttm, bdev, size, page_flags); INIT_LIST_HEAD(_dma->pages_list); if (ttm_dma_tt_alloc_page_directory(ttm_dma)) { @@ -275,11 +286,36 @@ int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, } EXPORT_SYMBOL(ttm_dma_tt_init); +int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags) +{ + struct ttm_tt *ttm = _dma->ttm; + int ret; + + ttm_tt_init_fields(ttm, bdev, size, page_flags); + + INIT_LIST_HEAD(_dma->pages_list); + if (page_flags & TTM_PAGE_FLAG_SG) + ret = ttm_sg_tt_alloc_page_directory(ttm_dma); + else + ret = ttm_dma_tt_alloc_page_directory(ttm_dma); + if (ret) { + ttm_tt_destroy(ttm); + pr_err("Failed allocating page table\n"); + return -ENOMEM; + } + return 0; +} +EXPORT_SYMBOL(ttm_sg_tt_init); + void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma) { struct ttm_tt *ttm = _dma->ttm; - kvfree(ttm->pages); + if (ttm->pages) + kvfree(ttm->pages); + else + kvfree(ttm_dma->dma_address); ttm->pages = NULL; ttm_dma->dma_address = NULL; } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 9c78556b488e..1cf316a4257c 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -163,6 +163,8 @@ int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev, unsigned long size, uint32_t page_flags); int ttm_dma_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, unsigned long size, uint32_t page_flags); +int ttm_sg_tt_init(struct ttm_dma_tt *ttm_dma, struct ttm_bo_device *bdev, + unsigned long size, uint32_t page_flags); /** * ttm_tt_fini ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/5] drm/prime: fix potential race in drm_gem_map_detach
Ping? Anybody who could give me an review on those changes? The first one is just nice to have, the rest is a nice memory usage reduction in some cases. Christian. Am 27.02.2018 um 12:49 schrieb Christian König: Unpin the GEM object only after freeing the sg table. Signed-off-by: Christian König--- drivers/gpu/drm/drm_prime.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index e82a976f0fba..c38dacda6119 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -230,26 +230,26 @@ void drm_gem_map_detach(struct dma_buf *dma_buf, struct drm_prime_attachment *prime_attach = attach->priv; struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - struct sg_table *sgt; - if (dev->driver->gem_prime_unpin) - dev->driver->gem_prime_unpin(obj); + if (prime_attach) { + struct sg_table *sgt = prime_attach->sgt; - if (!prime_attach) - return; - - sgt = prime_attach->sgt; - if (sgt) { - if (prime_attach->dir != DMA_NONE) - dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, - prime_attach->dir, - DMA_ATTR_SKIP_CPU_SYNC); - sg_free_table(sgt); + if (sgt) { + if (prime_attach->dir != DMA_NONE) + dma_unmap_sg_attrs(attach->dev, sgt->sgl, + sgt->nents, + prime_attach->dir, + DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + } + + kfree(sgt); + kfree(prime_attach); + attach->priv = NULL; } - kfree(sgt); - kfree(prime_attach); - attach->priv = NULL; + if (dev->driver->gem_prime_unpin) + dev->driver->gem_prime_unpin(obj); } EXPORT_SYMBOL(drm_gem_map_detach); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdgpu: add amdgpu_evict_gtt debugfs entry
Allow evicting all BOs from the GTT domain. Signed-off-by: Christian König--- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index ee76b468774a..369beb5041a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) return 0; } +static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "(%d)\n", ttm_bo_evict_mm(>mman.bdev, TTM_PL_TT)); + return 0; +} + static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, {"amdgpu_test_ib", _debugfs_test_ib}, - {"amdgpu_evict_vram", _debugfs_evict_vram} + {"amdgpu_evict_vram", _debugfs_evict_vram}, + {"amdgpu_evict_gtt", _debugfs_evict_gtt}, }; int amdgpu_debugfs_init(struct amdgpu_device *adev) -- 2.14.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdgpu: further mitigate workaround for i915
Disable the workaround on imported BOs as well. Signed-off-by: Christian König--- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 8ce74a1d9966..fb66b45548d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -107,12 +107,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ww_mutex_lock(>lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, ); - ww_mutex_unlock(>lock); if (ret) - return ERR_PTR(ret); + goto error; + + if (attach->dmabuf->ops != _dmabuf_ops) + bo->prime_shared_count = 1; - bo->prime_shared_count = 1; + ww_mutex_unlock(>lock); return >gem_base; + +error: + ww_mutex_unlock(>lock); + return ERR_PTR(ret); } static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, -- 2.14.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amdgpu: drop gtt->adev
We can use ttm->bdev instead. Signed-off-by: Christian König--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 614811061d3d..b19a4f7a845e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -679,7 +679,6 @@ struct amdgpu_ttm_gup_task_list { struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; - struct amdgpu_device*adev; u64 offset; uint64_tuserptr; struct mm_struct*usermm; @@ -837,6 +836,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void*)ttm; uint64_t flags; int r = 0; @@ -863,9 +863,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return 0; } - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) @@ -942,6 +942,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; @@ -952,7 +953,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); @@ -986,7 +987,6 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, return NULL; } gtt->ttm.ttm.func = _backend_func; - gtt->adev = adev; if (ttm_dma_tt_init(>ttm, bdev, size, page_flags)) { kfree(gtt); return NULL; -- 2.14.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] dma-buf/reservation: should keep the new fence in add_shared_inplace
Change-Id: If6a979ba9fd6c923b82212f35f07a9ff31c86767 Signed-off-by: Monk Liu--- drivers/dma-buf/reservation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 375de41..9b875267 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -118,7 +118,7 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, old_fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(obj)); - if (old_fence->context == fence->context) { + if (dma_fence_is_later(fence, old_fence)) { /* memory barrier is added by write_seqcount_begin */ RCU_INIT_POINTER(fobj->shared[i], fence); write_seqcount_end(>seq); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)
Hi Monk, then that was written differently. Maybe "might_sleep()" or something like that? It's just a checker which raises a nice warning when you try to sleep in atomic context on debug kernels. Christian. Am 05.03.2018 um 12:27 schrieb Liu, Monk: Hi Christian I couln't find this "may_sleep()" in my 4.13kernel , did I miss something ?? Thanks /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月5日 19:21 To: Liu, Monk; Koenig, Christian ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) Am 05.03.2018 um 09:08 schrieb Liu, Monk: To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. Cannot do that 20ms is not enough, sometimes you need 10 seconds since other VFs may doing bad things like occupying GPU intentionally or they are doing TDR, so I don't think separate read and write is good idea, they should be treated equally Well the question is if separating read would actually help. 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. Sorry what is may_sleep() ?? 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. From current LKG code, the only one spin lock may wrapping the kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during init(), Since init() is run under the case of exclusive mode for SRIOV, which means: 1) register access is not go through KIQ (see admgpu_mm_reg) 2) those functions are only in bif_medium_grain_xxx part (vi.c and nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV detect) So I don' think this spin_lock may cause trouble... Ok in this case let's keep the patch for now, but please provide a new patch which adds "if (!in_intterupt()) may_sleep();" in both the read and write function. This way we should at least catch problems early on. Christian. /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月5日 15:57 To: Liu, Monk ; Koenig, Christian ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) otherwise I don't see how it is better by reverting it Well it's better to revert it for now because it seems to create more problems than it solves. To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. Regards, Christian. Am 05.03.2018 um 05:20 schrieb Liu, Monk: When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's why I must use this patch to let thread sleep a while and try again, If you insist reverting this patch please give me a solution, otherwise I don't see how it is better by reverting it /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月3日 21:38 To: Kuehling, Felix ; Liu, Monk ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) Am 02.03.2018 um 21:47 schrieb Felix Kuehling: On 2018-03-02 04:29 AM, Liu, Monk wrote: In_atomic() isnot encouraged to be used to judge if sleep is possible, see the macros of it #define in_atomic() (preept_count() != 0) OK. But my point is still that you're not testing the right thing when you check in_interrupt(). The comment before the in_atomic macro definition states the limitations and says "do not use in driver code". Unfortunately it doesn't suggest any alternative. I think in_interrupt is actually worse, because it misses even more cases than in_atomic. Thinking about this, Felix seems to be absolutely right. So we need to revert this patch since you can't reliable detect in a driver if sleeping is allowed or not. Regards, Christian. Regards, Felix /Monk -Original Message-
RE: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)
Hi Christian I couln't find this "may_sleep()" in my 4.13kernel , did I miss something ?? Thanks /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月5日 19:21 To: Liu, Monk; Koenig, Christian ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) Am 05.03.2018 um 09:08 schrieb Liu, Monk: > To better approach this issue I suggest to do the following: > 1. Revert the original patch. > > 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms > maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. > Cannot do that 20ms is not enough, sometimes you need 10 seconds since > other VFs may doing bad things like occupying GPU intentionally or > they are doing TDR, so I don't think separate read and write is good > idea, they should be treated equally Well the question is if separating read would actually help. > > 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then > retest. That should at least print a nice warning when called from atomic > context. > Sorry what is may_sleep() ?? > > 4. Test the whole thing and try to fix all warnings about atomic > contexts from the may_sleep(); > > 5. Reapply the original patch, but this time only for the read function, not > the write function. > > > From current LKG code, the only one spin lock may wrapping the > kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during > init(), Since init() is run under the case of exclusive mode for SRIOV, which > means: > 1) register access is not go through KIQ (see admgpu_mm_reg) > 2) those functions are only in bif_medium_grain_xxx part (vi.c and > nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV > detect) So I don' think this spin_lock may cause trouble... Ok in this case let's keep the patch for now, but please provide a new patch which adds "if (!in_intterupt()) may_sleep();" in both the read and write function. This way we should at least catch problems early on. Christian. > > /Monk > > > > -Original Message- > From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] > Sent: 2018年3月5日 15:57 > To: Liu, Monk ; Koenig, Christian > ; Kuehling, Felix ; > amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in > IRQ(v2) > >> otherwise I don't see how it is better by reverting it > Well it's better to revert it for now because it seems to create more > problems than it solves. > > To better approach this issue I suggest to do the following: > 1. Revert the original patch. > > 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms > maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. > > 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then > retest. That should at least print a nice warning when called from atomic > context. > > 4. Test the whole thing and try to fix all warnings about atomic > contexts from the may_sleep(); > > 5. Reapply the original patch, but this time only for the read function, not > the write function. > > Regards, > Christian. > > Am 05.03.2018 um 05:20 schrieb Liu, Monk: >> When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to >> 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's >> why I must use this patch to let thread sleep a while and try again, >> >> If you insist reverting this patch please give me a solution, >> otherwise I don't see how it is better by reverting it >> >> /Monk >> >> -Original Message- >> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] >> Sent: 2018年3月3日 21:38 >> To: Kuehling, Felix ; Liu, Monk >> ; amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in >> IRQ(v2) >> >> Am 02.03.2018 um 21:47 schrieb Felix Kuehling: >>> On 2018-03-02 04:29 AM, Liu, Monk wrote: In_atomic() isnot encouraged to be used to judge if sleep is possible, see the macros of it #define in_atomic() (preept_count() != 0) >>> OK. But my point is still that you're not testing the right thing >>> when you check in_interrupt(). The comment before the in_atomic >>> macro definition states the limitations and says "do not use in driver >>> code". >>> Unfortunately it doesn't suggest any alternative. I think >>> in_interrupt is actually worse, because it misses even more cases than >>> in_atomic. >> Thinking about this, Felix seems to be absolutely right. >> >> So we need to revert this patch since you can't reliable detect in a driver >> if sleeping is allowed or not. >> >> Regards, >> Christian. >> >>> Regards, >>> Felix >>> /Monk
Re: [PATCH 1/3] drm/amd/pp: Remove cgs wrap interface for temperature update
Acked-by: Christian Königfor the series. Am 05.03.2018 um 11:43 schrieb Rex Zhu: Change-Id: I742e82315910ce57aeb4a391fe2ac1cdfccdb8ea --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 13 - drivers/gpu/drm/amd/include/cgs_common.h | 6 -- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 4 +++- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index f37482c..76f4758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -905,18 +905,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena return 0; } -static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, - int min_temperature, - int max_temperature) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm.thermal.min_temp = min_temperature; - adev->pm.dpm.thermal.max_temp = max_temperature; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, .free_gpu_mem = amdgpu_cgs_free_gpu_mem, @@ -941,7 +929,6 @@ static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, - .set_temperature_range = amdgpu_cgs_set_temperature_range, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index 113ba6f..7f26f20 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -354,9 +354,6 @@ typedef int(*cgs_get_active_displays_info)( typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock); -typedef int (*cgs_set_temperature_range)(struct cgs_device *cgs_device, - int min_temperature, - int max_temperature); struct cgs_ops { /* memory management calls (similar to KFD interface) */ cgs_alloc_gpu_mem_t alloc_gpu_mem; @@ -389,7 +386,6 @@ struct cgs_ops { cgs_is_virtualization_enabled_t is_virtualization_enabled; cgs_enter_safe_mode enter_safe_mode; cgs_lock_grbm_idx lock_grbm_idx; - cgs_set_temperature_range set_temperature_range; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -465,7 +461,5 @@ struct cgs_device #define cgs_lock_grbm_idx(cgs_device, lock) \ CGS_CALL(lock_grbm_idx, cgs_device, lock) -#define cgs_set_temperature_range(dev, min_temp, max_temp) \ - CGS_CALL(set_temperature_range, dev, min_temp, max_temp) #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index f06f8f4..b784131 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -222,6 +222,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) { int ret = 0; struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->hwmgr_func->get_thermal_temperature_range) hwmgr->hwmgr_func->get_thermal_temperature_range( @@ -232,7 +233,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) && hwmgr->hwmgr_func->start_thermal_controller != NULL) ret = hwmgr->hwmgr_func->start_thermal_controller(hwmgr, ); - cgs_set_temperature_range(hwmgr->device, range.min, range.max); + adev->pm.dpm.thermal.min_temp = range.min; + adev->pm.dpm.thermal.max_temp = range.max; return ret; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)
Am 05.03.2018 um 09:08 schrieb Liu, Monk: To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. Cannot do that 20ms is not enough, sometimes you need 10 seconds since other VFs may doing bad things like occupying GPU intentionally or they are doing TDR, so I don't think separate read and write is good idea, they should be treated equally Well the question is if separating read would actually help. 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. Sorry what is may_sleep() ?? 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. From current LKG code, the only one spin lock may wrapping the kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during init(), Since init() is run under the case of exclusive mode for SRIOV, which means: 1) register access is not go through KIQ (see admgpu_mm_reg) 2) those functions are only in bif_medium_grain_xxx part (vi.c and nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV detect) So I don' think this spin_lock may cause trouble... Ok in this case let's keep the patch for now, but please provide a new patch which adds "if (!in_intterupt()) may_sleep();" in both the read and write function. This way we should at least catch problems early on. Christian. /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月5日 15:57 To: Liu, Monk; Koenig, Christian ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) otherwise I don't see how it is better by reverting it Well it's better to revert it for now because it seems to create more problems than it solves. To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. Regards, Christian. Am 05.03.2018 um 05:20 schrieb Liu, Monk: When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's why I must use this patch to let thread sleep a while and try again, If you insist reverting this patch please give me a solution, otherwise I don't see how it is better by reverting it /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月3日 21:38 To: Kuehling, Felix ; Liu, Monk ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) Am 02.03.2018 um 21:47 schrieb Felix Kuehling: On 2018-03-02 04:29 AM, Liu, Monk wrote: In_atomic() isnot encouraged to be used to judge if sleep is possible, see the macros of it #define in_atomic() (preept_count() != 0) OK. But my point is still that you're not testing the right thing when you check in_interrupt(). The comment before the in_atomic macro definition states the limitations and says "do not use in driver code". Unfortunately it doesn't suggest any alternative. I think in_interrupt is actually worse, because it misses even more cases than in_atomic. Thinking about this, Felix seems to be absolutely right. So we need to revert this patch since you can't reliable detect in a driver if sleeping is allowed or not. Regards, Christian. Regards, Felix /Monk -Original Message- From: Kuehling, Felix Sent: 2018年3月1日 23:50 To: amd-gfx@lists.freedesktop.org; Liu, Monk Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) On 2018-02-28 02:27 AM, Monk Liu wrote: sometimes GPU is switched to other VFs and won't swich back soon, so the kiq reg access will not signal within a short period, instead of busy waiting a long time(MAX_KEQ_REG_WAIT) and returning TMO we can istead sleep 5ms and try again later (non irq context) And since the waiting in kiq_r/weg is busy wait, so MAX_KIQ_REG_WAIT shouldn't set to a long time, set it to 10ms is more appropriate. if gpu already in reset state, don't retry the KIQ reg access otherwise
[PATCH 3/3] drm/amdgpu: Delete cgs interface for gpu memory allocate/free
Change-Id: I9085b42b3ffac925aebd8000760d456a4f8ec03c Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 131 --- drivers/gpu/drm/amd/include/cgs_common.h | 115 --- 2 files changed, 246 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 76f4758..f2dd98d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -42,131 +42,6 @@ struct amdgpu_cgs_device { ((struct amdgpu_cgs_device *)cgs_device)->adev -static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, - enum cgs_gpu_mem_type type, - uint64_t size, uint64_t align, - cgs_handle_t *handle) -{ - CGS_FUNC_ADEV; - uint16_t flags = 0; - int ret = 0; - uint32_t domain = 0; - struct amdgpu_bo *obj; - - /* fail if the alignment is not a power of 2 */ - if (((align != 1) && (align & (align - 1))) - || size == 0 || align == 0) - return -EINVAL; - - - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - default: - return -EINVAL; - } - - - *handle = 0; - - ret = amdgpu_bo_create(adev, size, align, true, domain, flags, - NULL, NULL, ); - if (ret) { - DRM_ERROR("(%d) bo create failed\n", ret); - return ret; - } - *handle = (cgs_handle_t)obj; - - return ret; -} - -static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(obj); - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(); - - } - return 0; -} - -static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - uint64_t *mcaddr) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - WARN_ON_ONCE(obj->placement.num_placement > 1); - - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - void **map) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_kmap(obj, map); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - amdgpu_bo_kunmap(obj); - amdgpu_bo_unreserve(obj); - return r; -} - static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) { CGS_FUNC_ADEV; @@ -906,12 +781,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena } static const struct cgs_ops amdgpu_cgs_ops = { - .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, - .free_gpu_mem = amdgpu_cgs_free_gpu_mem, - .gmap_gpu_mem =
[PATCH 2/3] drm/amd/pp: Delete the wrap layer of smu_allocate/free_memory
use amdgpu_bo_create/free_kernel directly. Change-Id: I74f20353edd4e0df6328db66914cd9eabb60e1d7 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 7 - drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c | 39 +++--- drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c | 53 +++ drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c | 52 +++ drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h | 11 +- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 51 --- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 155 ++--- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h | 11 +- 10 files changed, 176 insertions(+), 225 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index e1f6e83..8872c5c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -106,13 +106,6 @@ enum SMU_MAC_DEFINITION { extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter); -extern int smu_allocate_memory(void *device, uint32_t size, -enum cgs_gpu_mem_type type, -uint32_t byte_align, uint64_t *mc_addr, -void **kptr, void *handle); - -extern int smu_free_memory(void *device, void *handle); - extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr); extern int smum_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c index 7fe4c11..0d2892d 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c @@ -750,7 +750,6 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr) static int cz_smu_init(struct pp_hwmgr *hwmgr) { - uint64_t mc_addr = 0; int ret = 0; struct cz_smumgr *cz_smu; @@ -768,31 +767,31 @@ static int cz_smu_init(struct pp_hwmgr *hwmgr) ALIGN(sizeof(struct SMU8_MultimediaPowerLogData), 32) + ALIGN(sizeof(struct SMU8_Fusion_ClkTable), 32); - ret = smu_allocate_memory(hwmgr->device, + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, cz_smu->toc_buffer.data_size, - CGS_GPU_MEM_TYPE__GART_CACHEABLE, PAGE_SIZE, - _addr, - _smu->toc_buffer.kaddr, - _smu->toc_buffer.handle); + AMDGPU_GEM_DOMAIN_VRAM, + _smu->toc_buffer.handle, + _smu->toc_buffer.mc_addr, + _smu->toc_buffer.kaddr); if (ret != 0) return -1; - cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); - cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); + cz_smu->toc_buffer.mc_addr_high = smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); + cz_smu->toc_buffer.mc_addr_low = smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); - ret = smu_allocate_memory(hwmgr->device, + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, cz_smu->smu_buffer.data_size, - CGS_GPU_MEM_TYPE__GART_CACHEABLE, PAGE_SIZE, - _addr, - _smu->smu_buffer.kaddr, - _smu->smu_buffer.handle); + AMDGPU_GEM_DOMAIN_VRAM, + _smu->smu_buffer.handle, + _smu->toc_buffer.mc_addr, + _smu->smu_buffer.kaddr); if (ret != 0) return -1; - cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); - cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); + cz_smu->smu_buffer.mc_addr_high = smu_upper_32_bits(cz_smu->toc_buffer.mc_addr); + cz_smu->smu_buffer.mc_addr_low = smu_lower_32_bits(cz_smu->toc_buffer.mc_addr); if (0 != cz_smu_populate_single_scratch_entry(hwmgr, CZ_SCRATCH_ENTRY_UCODE_ID_RLC_SCRATCH, @@ -845,10 +844,12 @@ static int cz_smu_fini(struct pp_hwmgr *hwmgr) cz_smu = (struct cz_smumgr *)hwmgr->smu_backend; if (cz_smu) { - cgs_free_gpu_mem(hwmgr->device, - cz_smu->toc_buffer.handle); - cgs_free_gpu_mem(hwmgr->device, - cz_smu->smu_buffer.handle); +
[PATCH 1/3] drm/amd/pp: Remove cgs wrap interface for temperature update
Change-Id: I742e82315910ce57aeb4a391fe2ac1cdfccdb8ea --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 13 - drivers/gpu/drm/amd/include/cgs_common.h | 6 -- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 4 +++- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index f37482c..76f4758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -905,18 +905,6 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena return 0; } -static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, - int min_temperature, - int max_temperature) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm.thermal.min_temp = min_temperature; - adev->pm.dpm.thermal.max_temp = max_temperature; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, .free_gpu_mem = amdgpu_cgs_free_gpu_mem, @@ -941,7 +929,6 @@ static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, - .set_temperature_range = amdgpu_cgs_set_temperature_range, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index 113ba6f..7f26f20 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -354,9 +354,6 @@ typedef int(*cgs_get_active_displays_info)( typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock); -typedef int (*cgs_set_temperature_range)(struct cgs_device *cgs_device, - int min_temperature, - int max_temperature); struct cgs_ops { /* memory management calls (similar to KFD interface) */ cgs_alloc_gpu_mem_t alloc_gpu_mem; @@ -389,7 +386,6 @@ struct cgs_ops { cgs_is_virtualization_enabled_t is_virtualization_enabled; cgs_enter_safe_mode enter_safe_mode; cgs_lock_grbm_idx lock_grbm_idx; - cgs_set_temperature_range set_temperature_range; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -465,7 +461,5 @@ struct cgs_device #define cgs_lock_grbm_idx(cgs_device, lock) \ CGS_CALL(lock_grbm_idx, cgs_device, lock) -#define cgs_set_temperature_range(dev, min_temp, max_temp) \ - CGS_CALL(set_temperature_range, dev, min_temp, max_temp) #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index f06f8f4..b784131 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -222,6 +222,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) { int ret = 0; struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->hwmgr_func->get_thermal_temperature_range) hwmgr->hwmgr_func->get_thermal_temperature_range( @@ -232,7 +233,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) && hwmgr->hwmgr_func->start_thermal_controller != NULL) ret = hwmgr->hwmgr_func->start_thermal_controller(hwmgr, ); - cgs_set_temperature_range(hwmgr->device, range.min, range.max); + adev->pm.dpm.thermal.min_temp = range.min; + adev->pm.dpm.thermal.max_temp = range.max; return ret; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"
You can tag my RB -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Deng, Emily Sent: 2018年3月5日 18:20 To: Deng, Emily; amd-gfx@lists.freedesktop.org Subject: RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF" Ping Hi all, Please help review this, the regression introduced by the commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test. Best Wishes, Emily Deng > -Original Message- > From: Emily Deng [mailto:emily.d...@amd.com] > Sent: Friday, March 02, 2018 11:32 AM > To: amd-gfx@lists.freedesktop.org > Cc: Deng, Emily > Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr > for VF" > > This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145. > The sdma wptr poll memomy doesn't have the same efficiency as > doorbell, and it will make sdma hang when running tests. > > Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353 > Signed-off-by: Emily Deng > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 - > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 27 --- > 2 files changed, 8 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 102dad3..5dcf98b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -186,7 +186,6 @@ struct amdgpu_ring { > uint64_teop_gpu_addr; > u32 doorbell_index; > booluse_doorbell; > - booluse_pollmem; > unsignedwptr_offs; > unsignedfence_offs; > uint64_tcurrent_ctx; > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 521978c..d3fb3ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct > amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 wptr; > > - if (ring->use_doorbell || ring->use_pollmem) { > + if (ring->use_doorbell) { > /* XXX check if swapping is necessary on BE */ > wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; > } else { > @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct > amdgpu_ring *ring) > > if (ring->use_doorbell) { > u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > + > /* XXX check if swapping is necessary on BE */ > WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > WDOORBELL32(ring->doorbell_index, lower_32_bits(ring- > >wptr) << 2); > - } else if (ring->use_pollmem) { > - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > - > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > } else { > int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1; > > @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct > amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], > upper_32_bits(wptr_gpu_addr)); > wptr_poll_cntl = > RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); > - if (ring->use_pollmem) > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 1); > + if (amdgpu_sriov_vf(adev)) > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); > else > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 0); > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], > wptr_poll_cntl); > > /* enable DMA RB */ > @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle) > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = >sdma.instance[i].ring; > ring->ring_obj = NULL; > - if (!amdgpu_sriov_vf(adev)) { > - ring->use_doorbell = true; > - ring->doorbell_index = (i == 0) ? > - AMDGPU_DOORBELL_sDMA_ENGINE0 : > AMDGPU_DOORBELL_sDMA_ENGINE1; > - } else { > - ring->use_pollmem = true; > - } > + ring->use_doorbell = true; > + ring->doorbell_index = (i == 0) ? > + AMDGPU_DOORBELL_sDMA_ENGINE0 : >
Re: [PATCH 1/1] amdgpu: add scatter gather display flag
On 2018-03-03 12:27 AM, Samuel Li wrote: > --- > include/drm/amdgpu_drm.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h > index d9aa4a3..dc4d8bd 100644 > --- a/include/drm/amdgpu_drm.h > +++ b/include/drm/amdgpu_drm.h > @@ -526,6 +526,7 @@ struct drm_amdgpu_cs_chunk_data { > */ > #define AMDGPU_IDS_FLAGS_FUSION 0x1 > #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 > +#define AMDGPU_IDS_FLAGS_SGDISPLAY 0x4 > > /* indicate if acceleration can be working */ > #define AMDGPU_INFO_ACCEL_WORKING0x00 > See include/drm/README on how to synchronize these headers with the kernel. P.S. Please put this into .git/config in your libdrm tree: [format] subjectprefix = "PATCH libdrm" -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr for VF"
Ping Hi all, Please help review this, the regression introduced by the commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145 is gating the sriov sanity test. Best Wishes, Emily Deng > -Original Message- > From: Emily Deng [mailto:emily.d...@amd.com] > Sent: Friday, March 02, 2018 11:32 AM > To: amd-gfx@lists.freedesktop.org > Cc: Deng, Emily> Subject: [PATCH] Revert "drm/amdgpu: use polling mem to set SDMA3 wptr > for VF" > > This reverts commit 2ffe31deb27579e2f2c9444e01f4d8abf385d145. > The sdma wptr poll memomy doesn't have the same efficiency as doorbell, > and it will make sdma hang when running tests. > > Change-Id: I6e334430b309b0c21aa18a08764320c7ff51e353 > Signed-off-by: Emily Deng > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 - > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 27 --- > 2 files changed, 8 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 102dad3..5dcf98b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -186,7 +186,6 @@ struct amdgpu_ring { > uint64_teop_gpu_addr; > u32 doorbell_index; > booluse_doorbell; > - booluse_pollmem; > unsignedwptr_offs; > unsignedfence_offs; > uint64_tcurrent_ctx; > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 521978c..d3fb3ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct > amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 wptr; > > - if (ring->use_doorbell || ring->use_pollmem) { > + if (ring->use_doorbell) { > /* XXX check if swapping is necessary on BE */ > wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; > } else { > @@ -380,13 +380,10 @@ static void sdma_v3_0_ring_set_wptr(struct > amdgpu_ring *ring) > > if (ring->use_doorbell) { > u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > + > /* XXX check if swapping is necessary on BE */ > WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > WDOORBELL32(ring->doorbell_index, lower_32_bits(ring- > >wptr) << 2); > - } else if (ring->use_pollmem) { > - u32 *wb = (u32 *)>wb.wb[ring->wptr_offs]; > - > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > } else { > int me = (ring == >adev->sdma.instance[0].ring) ? 0 : 1; > > @@ -719,14 +716,10 @@ static int sdma_v3_0_gfx_resume(struct > amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + > sdma_offsets[i], > upper_32_bits(wptr_gpu_addr)); > wptr_poll_cntl = > RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); > - if (ring->use_pollmem) > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 1); > + if (amdgpu_sriov_vf(adev)) > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); > else > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - > SDMA0_GFX_RB_WPTR_POLL_CNTL, > -ENABLE, 0); > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > +SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); > WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + > sdma_offsets[i], wptr_poll_cntl); > > /* enable DMA RB */ > @@ -1208,13 +1201,9 @@ static int sdma_v3_0_sw_init(void *handle) > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = >sdma.instance[i].ring; > ring->ring_obj = NULL; > - if (!amdgpu_sriov_vf(adev)) { > - ring->use_doorbell = true; > - ring->doorbell_index = (i == 0) ? > - AMDGPU_DOORBELL_sDMA_ENGINE0 : > AMDGPU_DOORBELL_sDMA_ENGINE1; > - } else { > - ring->use_pollmem = true; > - } > + ring->use_doorbell = true; > + ring->doorbell_index = (i == 0) ? > + AMDGPU_DOORBELL_sDMA_ENGINE0 : > AMDGPU_DOORBELL_sDMA_ENGINE1; > > sprintf(ring->name, "sdma%d", i); > r = amdgpu_ring_init(adev, ring, 1024, > -- > 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: Enable scatter gather display support
On 2018-03-03 12:25 AM, Samuel Li wrote: > It's enabled by default. -1 is auto, to allow both vram and gtt > memory be available, for testing purpose only. Do we really need a module parameter for this? There's already too many of them. The driver should know which to use in which cases, and developers can easily tweak the code. Users should never need to change this. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re:
On 2018-03-03 05:49 AM, Keith Packard wrote: > Here are the patches to the modesetting driver amended for the amdgpu > driver. Thanks for the patches. Unfortunately, since this driver still has to compile and work with xserver >= 1.13, at least patches 1 & 3 cannot be applied as is. I was going to port these and take care of that anyway, though I might not get around to it before April. If it can't wait that long, I can give you details about what needs to be done. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [ANNOUNCE] xf86-video-amdgpu 18.0.0
On 2018-03-04 08:25 AM, Mario Kleiner wrote: > Hi Michel, > > wrt. our conversation from ~ 1 month ago: > > On 01/22/2018 07:01 PM, Michel Dänzer wrote: >> On 2018-01-22 03:14 AM, Mario Kleiner wrote: >> >>> When testing against current master in dual-x-screen mode, >>> i found out that the current ati-ddx commit at the top 1fe8ca75974c52 >>> "Keep track of how many SW cursors are visible on each screen" >>> breaks multi x-screen setups by screwing up cursor handling. >> >> I stumbled over this myself the other day. There's an issue with the >> (un)wrapping of miPointerSpriteFuncRec entries. Not sure yet what >> exactly's wrong with it though. I'll look into it. >> > > Has the multi-x-screen breakage been fixed for amdgpu 18.0.0? I can't > see a commit hinting at a fix, and i assume it was broken by the new HW > cursor + pageflip logic, just as in the xf86-video-ati case? I can't > test anything myself atm. as my only amdgpu capable gpu has left me :( > > Broken multi-x-screen from an official driver release ending in a distro > would be a big problem for many vision scientists, as they depend on AMD > + dual-x-screen support for their work. Crap, I forgot about this issue. :( I'll try fixing it ASAP and making a point release by the end of next week. Apologies for any inconvenience. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/4] drm/amd/pp: Add auto power profilng switch based on workloads
Reviewed-by: Evan Quan-Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex Zhu Sent: Friday, March 02, 2018 8:44 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, Rex Subject: [PATCH 4/4] drm/amd/pp: Add auto power profilng switch based on workloads Add power profiling mode dynamic switch based on the workloads. Currently, support Cumpute, VR, Video, 3D,power saving with Cumpute have highest prority, power saving have lowest prority. in manual dpm mode, driver will stop auto switch, just save the client's requests. user can set power profiling mode through sysfs. when exit manual dpm mode, driver will response the client's requests. switch based on the client's prority. Change-Id: Ia97b679f4f11a7c3cabd350462101897909c8115 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h| 4 +-- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 18 +--- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 32 ++ drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c| 16 +++ drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c | 11 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 ++-- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 8 +- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h | 3 -- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 5 9 files changed, 66 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 9c373f8f..643d008 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,9 +341,9 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type) \ +#define amdgpu_dpm_switch_power_profile(adev, type, en) \ ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type)) + (adev)->powerplay.pp_handle, type, en)) #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 7dfba2d..ec54584 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -83,20 +83,6 @@ enum amd_vce_level { AMD_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ }; -enum amd_pp_profile_type { - AMD_PP_GFX_PROFILE, - AMD_PP_COMPUTE_PROFILE, -}; - -struct amd_pp_profile { - enum amd_pp_profile_type type; - uint32_t min_sclk; - uint32_t min_mclk; - uint16_t activity_threshold; - uint8_t up_hyst; - uint8_t down_hyst; -}; - enum amd_fan_ctrl_mode { AMD_FAN_CTRL_NONE = 0, AMD_FAN_CTRL_MANUAL = 1, @@ -151,7 +137,6 @@ enum PP_SMC_POWER_PROFILE { PP_SMC_POWER_PROFILE_VR = 0x3, PP_SMC_POWER_PROFILE_COMPUTE = 0x4, PP_SMC_POWER_PROFILE_CUSTOM = 0x5, - PP_SMC_POWER_PROFILE_AUTO = 0x6, }; enum { @@ -260,8 +245,7 @@ struct amd_pm_funcs { int (*get_pp_table)(void *handle, char **table); int (*set_pp_table)(void *handle, const char *buf, size_t size); void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m); - int (*switch_power_profile)(void *handle, - enum amd_pp_profile_type type); + int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE +type, bool en); /* export to amdgpu */ void (*powergate_uvd)(void *handle, bool gate); void (*powergate_vce)(void *handle, bool gate); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 4876871..3bd2fad 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1073,22 +1073,44 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3 } static int pp_dpm_switch_power_profile(void *handle, - enum amd_pp_profile_type type) + enum PP_SMC_POWER_PROFILE type, bool en) { struct pp_hwmgr *hwmgr; - struct amd_pp_profile request = {0}; struct pp_instance *pp_handle = (struct pp_instance *)handle; + long *workload; + uint32_t index; if (pp_check(pp_handle)) return -EINVAL; hwmgr = pp_handle->hwmgr; - if (hwmgr->current_power_profile != type) { - request.type = type; - pp_dpm_set_power_profile_state(handle, ); + if
RE: [PATCH 3/4] drm/amd/pp: Revert gfx/compute profile switch sysfs
Reviewed-by: Evan Quan-Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex Zhu Sent: Friday, March 02, 2018 8:44 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, Rex Subject: [PATCH 3/4] drm/amd/pp: Revert gfx/compute profile switch sysfs The gfx/compute profiling mode switch is only for internally test. Not a complete solution and unexpectly upstream. so revert it. Change-Id: I1af1b64a63b6fc12c24cf73df03b083b3661ca02 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h| 8 - drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 180 --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c| 256 - drivers/gpu/drm/amd/amdgpu/ci_dpm.h| 7 - drivers/gpu/drm/amd/include/kgd_pp_interface.h | 7 - drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 114 - .../gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 17 -- drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c | 2 - drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 77 --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 86 --- .../gpu/drm/amd/powerplay/inc/hardwaremanager.h| 1 - drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 9 - drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 3 - drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 27 --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 66 -- .../drm/amd/powerplay/smumgr/polaris10_smumgr.c| 64 -- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 10 - .../gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c| 64 -- 18 files changed, 998 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index bd745a4..9c373f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,14 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_get_power_profile_state(adev, query) \ - ((adev)->powerplay.pp_funcs->get_power_profile_state(\ - (adev)->powerplay.pp_handle, query)) - -#define amdgpu_dpm_set_power_profile_state(adev, request) \ - ((adev)->powerplay.pp_funcs->set_power_profile_state(\ - (adev)->powerplay.pp_handle, request)) - #define amdgpu_dpm_switch_power_profile(adev, type) \ ((adev)->powerplay.pp_funcs->switch_power_profile(\ (adev)->powerplay.pp_handle, type)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 9e73cbc..632b186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -734,161 +734,6 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, return -EINVAL; } -static ssize_t amdgpu_get_pp_power_profile(struct device *dev, - char *buf, struct amd_pp_profile *query) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - int ret = 0xff; - - if (adev->powerplay.pp_funcs->get_power_profile_state) - ret = amdgpu_dpm_get_power_profile_state( - adev, query); - - if (ret) - return ret; - - return snprintf(buf, PAGE_SIZE, - "%d %d %d %d %d\n", - query->min_sclk / 100, - query->min_mclk / 100, - query->activity_threshold, - query->up_hyst, - query->down_hyst); -} - -static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_GFX_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, ); -} - -static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_COMPUTE_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, ); -} - -static ssize_t amdgpu_set_pp_power_profile(struct device *dev, - const char *buf, - size_t count, - struct amd_pp_profile *request) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - uint32_t loop = 0; - char *sub_str, buf_cpy[128], *tmp_str; - const char delimiter[3] = {' ', '\n', '\0'}; - long int value; - int ret = 0xff; - - if (strncmp("reset", buf, strlen("reset")) == 0) { - if
RE: [PATCH 2/4] drm/amd/pp: Fix sclk in highest two levels in compute mode on smu7
Reviewed-by: Evan Quan-Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex Zhu Sent: Friday, March 02, 2018 8:44 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, Rex Subject: [PATCH 2/4] drm/amd/pp: Fix sclk in highest two levels in compute mode on smu7 Compute workload tends to be "bursty", Only tune the behavior of nature dpm don't work well for most of such workloads. From tests result, Fix sclk in highest two levels can get better performance. so add min sclk setting into the default cumpute workload policy on smu7. user still can change sclk range through sysfs pp_dpm_sclk for better perf/watt. Change-Id: I0ffd19a5d3c9e57f22aebd9ff1e18b980a111384 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 21 + 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c1e32f4..ba114482 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -5005,6 +5005,26 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return size; } +static void smu7_patch_compute_profile_mode(struct pp_hwmgr *hwmgr, + enum PP_SMC_POWER_PROFILE requst) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t tmp, level; + + if (requst == PP_SMC_POWER_PROFILE_COMPUTE) { + if (data->dpm_level_enable_mask.sclk_dpm_enable_mask) { + level = 0; + tmp = data->dpm_level_enable_mask.sclk_dpm_enable_mask; + while (tmp >>= 1) + level++; + if (level > 0) + smu7_force_clock_level(hwmgr, PP_SCLK, 3 << (level-1)); + } + } else if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) { + smu7_force_clock_level(hwmgr, PP_SCLK, data->dpm_level_enable_mask.sclk_dpm_enable_mask); + } +} + static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -5055,6 +5075,7 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint data->current_profile_setting.mclk_down_hyst = tmp.mclk_down_hyst; data->current_profile_setting.mclk_activity = tmp.mclk_activity; } + smu7_patch_compute_profile_mode(hwmgr, mode); hwmgr->power_profile_mode = mode; } break; -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amd/pp: Implement update_dpm_settings on CI
use SW method to update DPM settings by updating SRAM directly on CI. Change-Id: Ie9ed6c3a0e1c327cc9a9b06bec47b1cede87278d Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 97 1 file changed, 97 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 76f700f..179d00c 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2819,6 +2819,102 @@ static int ci_start_smu(struct pp_hwmgr *hwmgr) return 0; } +static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr, + void *profile_setting) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct ci_smumgr *smu_data = (struct ci_smumgr *) + (hwmgr->smu_backend); + struct profile_mode_setting *setting; + struct SMU7_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); + + uint32_t mclk_array = smu_data->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, MemoryLevel); + struct SMU7_Discrete_MemoryLevel *mclk_levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, tmp; + + if (profile_setting == NULL) + return -EINVAL; + + setting = (struct profile_mode_setting *)profile_setting; + + if (setting->bupdate_sclk) { + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + if (levels[i].ActivityLevel != + cpu_to_be16(setting->sclk_activity)) { + levels[i].ActivityLevel = cpu_to_be16(setting->sclk_activity); + + clk_activity_offset = array + (sizeof(SMU7_Discrete_GraphicsLevel) * i) + + offsetof(SMU7_Discrete_GraphicsLevel, ActivityLevel); + offset = clk_activity_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(clk_activity_offset, tmp, levels[i].ActivityLevel, sizeof(uint16_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + + } + if (levels[i].UpH != setting->sclk_up_hyst || + levels[i].DownH != setting->sclk_down_hyst) { + levels[i].UpH = setting->sclk_up_hyst; + levels[i].DownH = setting->sclk_down_hyst; + up_hyst_offset = array + (sizeof(SMU7_Discrete_GraphicsLevel) * i) + + offsetof(SMU7_Discrete_GraphicsLevel, UpH); + down_hyst_offset = array + (sizeof(SMU7_Discrete_GraphicsLevel) * i) + + offsetof(SMU7_Discrete_GraphicsLevel, DownH); + offset = up_hyst_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(up_hyst_offset, tmp, levels[i].UpH, sizeof(uint8_t)); + tmp = phm_set_field_to_u32(down_hyst_offset, tmp, levels[i].DownH, sizeof(uint8_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + } + } + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel); + } + + if (setting->bupdate_mclk) { + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) { + if (mclk_levels[i].ActivityLevel != + cpu_to_be16(setting->mclk_activity)) { + mclk_levels[i].ActivityLevel = cpu_to_be16(setting->mclk_activity); + + clk_activity_offset = mclk_array + (sizeof(SMU7_Discrete_MemoryLevel) * i) + + offsetof(SMU7_Discrete_MemoryLevel,
[PATCH 2/3] drm/amd/pp: Implement update_dpm_settings on Tonga
use SW method to update DPM settings by updating SRAM directly on Tonga. Change-Id: I21b1f5caee85587e30ff4d37b040d3ba36b843ee Signed-off-by: Rex Zhu--- .../gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c| 97 ++ 1 file changed, 97 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index a268b98..86fb770 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -3277,6 +3277,102 @@ static int tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, array_size, SMC_RAM_END); } +static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr, + void *profile_setting) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = (struct tonga_smumgr *) + (hwmgr->smu_backend); + struct profile_mode_setting *setting; + struct SMU72_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, GraphicsLevel); + + uint32_t mclk_array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, MemoryLevel); + struct SMU72_Discrete_MemoryLevel *mclk_levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, tmp; + + if (profile_setting == NULL) + return -EINVAL; + + setting = (struct profile_mode_setting *)profile_setting; + + if (setting->bupdate_sclk) { + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + if (levels[i].ActivityLevel != + cpu_to_be16(setting->sclk_activity)) { + levels[i].ActivityLevel = cpu_to_be16(setting->sclk_activity); + + clk_activity_offset = array + (sizeof(SMU72_Discrete_GraphicsLevel) * i) + + offsetof(SMU72_Discrete_GraphicsLevel, ActivityLevel); + offset = clk_activity_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(clk_activity_offset, tmp, levels[i].ActivityLevel, sizeof(uint16_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + + } + if (levels[i].UpHyst != setting->sclk_up_hyst || + levels[i].DownHyst != setting->sclk_down_hyst) { + levels[i].UpHyst = setting->sclk_up_hyst; + levels[i].DownHyst = setting->sclk_down_hyst; + up_hyst_offset = array + (sizeof(SMU72_Discrete_GraphicsLevel) * i) + + offsetof(SMU72_Discrete_GraphicsLevel, UpHyst); + down_hyst_offset = array + (sizeof(SMU72_Discrete_GraphicsLevel) * i) + + offsetof(SMU72_Discrete_GraphicsLevel, DownHyst); + offset = up_hyst_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(up_hyst_offset, tmp, levels[i].UpHyst, sizeof(uint8_t)); + tmp = phm_set_field_to_u32(down_hyst_offset, tmp, levels[i].DownHyst, sizeof(uint8_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + } + } + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel); + } + + if (setting->bupdate_mclk) { + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) { + if (mclk_levels[i].ActivityLevel != + cpu_to_be16(setting->mclk_activity)) { + mclk_levels[i].ActivityLevel = cpu_to_be16(setting->mclk_activity); + + clk_activity_offset =
[PATCH 1/3] drm/amd/pp: Implement update_dpm_settings on Fiji
use SW method to update DPM settings by updating SRAM directly on Fiji. Change-Id: Ic485e526332a0f1a169d395dfe810d71e5bdfea8 Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 97 ++ 1 file changed, 97 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 047b2df..fc556f0 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -2719,6 +2719,102 @@ static int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, array_size, SMC_RAM_END); } +static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr, + void *profile_setting) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *) + (hwmgr->smu_backend); + struct profile_mode_setting *setting; + struct SMU73_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, GraphicsLevel); + + uint32_t mclk_array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, MemoryLevel); + struct SMU73_Discrete_MemoryLevel *mclk_levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + uint32_t offset, up_hyst_offset, down_hyst_offset, clk_activity_offset, tmp; + + if (profile_setting == NULL) + return -EINVAL; + + setting = (struct profile_mode_setting *)profile_setting; + + if (setting->bupdate_sclk) { + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + if (levels[i].ActivityLevel != + cpu_to_be16(setting->sclk_activity)) { + levels[i].ActivityLevel = cpu_to_be16(setting->sclk_activity); + + clk_activity_offset = array + (sizeof(SMU73_Discrete_GraphicsLevel) * i) + + offsetof(SMU73_Discrete_GraphicsLevel, ActivityLevel); + offset = clk_activity_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(clk_activity_offset, tmp, levels[i].ActivityLevel, sizeof(uint16_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + + } + if (levels[i].UpHyst != setting->sclk_up_hyst || + levels[i].DownHyst != setting->sclk_down_hyst) { + levels[i].UpHyst = setting->sclk_up_hyst; + levels[i].DownHyst = setting->sclk_down_hyst; + up_hyst_offset = array + (sizeof(SMU73_Discrete_GraphicsLevel) * i) + + offsetof(SMU73_Discrete_GraphicsLevel, UpHyst); + down_hyst_offset = array + (sizeof(SMU73_Discrete_GraphicsLevel) * i) + + offsetof(SMU73_Discrete_GraphicsLevel, DownHyst); + offset = up_hyst_offset & ~0x3; + tmp = PP_HOST_TO_SMC_UL(cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset)); + tmp = phm_set_field_to_u32(up_hyst_offset, tmp, levels[i].UpHyst, sizeof(uint8_t)); + tmp = phm_set_field_to_u32(down_hyst_offset, tmp, levels[i].DownHyst, sizeof(uint8_t)); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset, PP_HOST_TO_SMC_UL(tmp)); + } + } + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel); + } + + if (setting->bupdate_mclk) { + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel); + for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) { + if (mclk_levels[i].ActivityLevel != + cpu_to_be16(setting->mclk_activity)) { + mclk_levels[i].ActivityLevel = cpu_to_be16(setting->mclk_activity); + + clk_activity_offset = mclk_array
RE: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2)
To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. Cannot do that 20ms is not enough, sometimes you need 10 seconds since other VFs may doing bad things like occupying GPU intentionally or they are doing TDR, so I don't think separate read and write is good idea, they should be treated equally 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. Sorry what is may_sleep() ?? 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. From current LKG code, the only one spin lock may wrapping the kiq_rreg/wreg() is the pcie_idx_lock, and this lock is only used during init(), Since init() is run under the case of exclusive mode for SRIOV, which means: 1) register access is not go through KIQ (see admgpu_mm_reg) 2) those functions are only in bif_medium_grain_xxx part (vi.c and nbio_v6.c) , and they won't hit under SRIOV ( we return in the head if SRIOV detect) So I don' think this spin_lock may cause trouble... /Monk -Original Message- From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] Sent: 2018年3月5日 15:57 To: Liu, Monk; Koenig, Christian ; Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in IRQ(v2) > otherwise I don't see how it is better by reverting it Well it's better to revert it for now because it seems to create more problems than it solves. To better approach this issue I suggest to do the following: 1. Revert the original patch. 2. Stop waiting to long for writes. E.g. use a separate timeout (20ms maybe?) to wait for the write. Then do a WARN_ON_ONCE when we timeout. 3. To the read function add a "if (!in_intterupt()) may_sleep();" and then retest. That should at least print a nice warning when called from atomic context. 4. Test the whole thing and try to fix all warnings about atomic contexts from the may_sleep(); 5. Reapply the original patch, but this time only for the read function, not the write function. Regards, Christian. Am 05.03.2018 um 05:20 schrieb Liu, Monk: > When there are 16 VF/VM on one GPU, we can easily hit "sys lockup to > 22s" kernel error/warning introduced by kiq_rreg/wreg routine That's > why I must use this patch to let thread sleep a while and try again, > > If you insist reverting this patch please give me a solution, > otherwise I don't see how it is better by reverting it > > /Monk > > -Original Message- > From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] > Sent: 2018年3月3日 21:38 > To: Kuehling, Felix ; Liu, Monk > ; amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in > IRQ(v2) > > Am 02.03.2018 um 21:47 schrieb Felix Kuehling: >> On 2018-03-02 04:29 AM, Liu, Monk wrote: >>> In_atomic() isnot encouraged to be used to judge if sleep is >>> possible, see the macros of it >>> >>> #define in_atomic() (preept_count() != 0) >> OK. But my point is still that you're not testing the right thing >> when you check in_interrupt(). The comment before the in_atomic macro >> definition states the limitations and says "do not use in driver code". >> Unfortunately it doesn't suggest any alternative. I think >> in_interrupt is actually worse, because it misses even more cases than >> in_atomic. > Thinking about this, Felix seems to be absolutely right. > > So we need to revert this patch since you can't reliable detect in a driver > if sleeping is allowed or not. > > Regards, > Christian. > >> Regards, >> Felix >> >>> /Monk >>> >>> -Original Message- >>> From: Kuehling, Felix >>> Sent: 2018年3月1日 23:50 >>> To: amd-gfx@lists.freedesktop.org; Liu, Monk >>> Subject: Re: [PATCH] drm/amdgpu: try again kiq access if not in >>> IRQ(v2) >>> >>> On 2018-02-28 02:27 AM, Monk Liu wrote: sometimes GPU is switched to other VFs and won't swich back soon, so the kiq reg access will not signal within a short period, instead of busy waiting a long time(MAX_KEQ_REG_WAIT) and returning TMO we can istead sleep 5ms and try again later (non irq context) And since the waiting in kiq_r/weg is busy wait, so MAX_KIQ_REG_WAIT shouldn't set to a long time, set it to 10ms is more appropriate. if gpu already in reset state, don't retry the KIQ reg access otherwise it would always hang because KIQ was already die usually. v2: replace schedule() with msleep() for the wait Change-Id:
RE: [PATCH 1/4] drm/amd/pp: Implement get/set_power_profile_mode on smu7
Comment inline. Regards Evan -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Rex Zhu Sent: Friday, March 02, 2018 8:44 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, RexSubject: [PATCH 1/4] drm/amd/pp: Implement get/set_power_profile_mode on smu7 It show what parameters can be configured to tune the behavior of natural dpm for perf/watt on smu7. user can select the mode per workload, but even the default per workload settings are not bulletproof. user can configure custom settings per different use case for better perf or better perf/watt. cat pp_power_profile_mode NUMMODE_NAME SCLK_UP_HYST SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL 0 3D_FULL_SCREEN:0 100 30 0 100 10 1 POWER_SAVING: 100 30 --- 2VIDEO:--- 10 16 31 3 VR:0 11 50 0 100 10 4 COMPUTE:05 30 --- 5 CUSTOM:000 000 * CURRENT:0 100 30 0 100 10 Under manual dpm level, user can echo "0/1/2/3/4">pp_power_profile_mode to select 3D_FULL_SCREEN/POWER_SAVING/VIDEO/VR/COMPUTE mode. echo "5 * * * * * * * *">pp_power_profile_mode to set custom settings. "5 * * * * * * * *" mean "CUSTOM enable_sclk SCLK_UP_HYST SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL enable_mclk MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL" if the parameter enable_sclk/enable_mclk is true, driver will update the following parameters to dpm table. if false, ignore the following parameters. Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 143 +++ 1 file changed, 143 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 731475b..c1e32f4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -81,6 +81,13 @@ #define PCIE_BUS_CLK1 #define TCLK(PCIE_BUS_CLK / 10) +static const struct profile_mode_setting smu7_profiling[5] = + {{1, 0, 100, 30, 1, 0, 100, 10}, +{1, 10, 0, 30, 0, 0, 0, 0}, +{0, 0, 0, 0, 1, 10, 16, 31}, +{1, 0, 11, 50, 1, 0, 100, 10}, +{1, 0, 5, 30, 0, 0, 0, 0}, + }; /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ enum DPM_EVENT_SRC { @@ -2475,6 +2482,9 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) smu7_patch_voltage_workaround(hwmgr); smu7_init_dpm_defaults(hwmgr); + hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + /* Get leakage voltage based on leakage ID. */ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EVV)) { @@ -4923,6 +4933,137 @@ static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, return 0; } +static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char +*buf) { + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t i, size = 0; + uint32_t len; + + static const char *profile_name[6] = {"3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM"}; + + static const char *title[8] = {"NUM", + "MODE_NAME", + "SCLK_UP_HYST", + "SCLK_DOWN_HYST", + "SCLK_ACTIVE_LEVEL", + "MCLK_UP_HYST", + "MCLK_DOWN_HYST", + "MCLK_ACTIVE_LEVEL"}; + + if (!buf) + return -EINVAL; + + size += sprintf(buf + size, "%s %16s %16s %16s %16s %16s %16s %16s\n", + title[0], title[1], title[2], title[3], + title[4], title[5], title[6], title[7]); + + len =