[PATCH] drm/amdkfd: Use gpu_offset for user queue's wptr
Directly use tbo's start address will miss the domain start offset. Need to use gpu_offset instead. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 77159b03a422..36e7171ad9a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -216,7 +216,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (q->wptr_bo) { wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; } queue_input.is_kfd_process = 1; -- 2.34.1
[PATCH] drm/amdgpu: Forbid kfd using cpu to update pt if vm is shared with gfx
If a same GPU VM is shared by kfd and graphic operations, we must align the vm update mode to sdma, or cpu kmap will fail and cause null pointer issue. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 291977b93b1d..e105ff9e8041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2239,6 +2239,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + struct amdgpu_bo *bo = vm->root.bo; int r; r = amdgpu_bo_reserve(vm->root.bo, true); @@ -2265,6 +2266,10 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); + + if (bo && !(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) + vm->use_cpu_for_update = false; + DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && -- 2.34.1
[PATCH] drm/amdgpu: set default num_kcq to 2 under sriov
The number of kernel queues has impact on the latency under sriov usecase. So to reduce the latency we set the default num_kcq = 2 under sriov if not set manually. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 1311e42ab8e9..d0ad7cb0fa05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -68,6 +68,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable mcbp for sriov */ amdgpu_mcbp = 1; + /* Reduce kcq number to 2 to reduce latency */ + if (amdgpu_num_kcq == -1) + amdgpu_num_kcq = 2; } void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, -- 2.25.1
[PATCH] drm/amdgpu: Enable mcbp under sriov by default
Enable mcbp under sriov by default. Asics with soc21 supports mcbp now so we should set it enabled. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 9dd474262c29..1311e42ab8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -65,8 +65,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->cg_flags = 0; adev->pg_flags = 0; - /* enable mcbp for sriov asic_type before soc21 */ - amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; + /* enable mcbp for sriov */ + amdgpu_mcbp = 1; } -- 2.25.1
[PATCH] drm/amdgpu: use sdma_v6 single packet invalidation
From: Pierre-Eric Pelloux-Prayer Send this patch on behalf of Pierre-Eric. This achieves the same result as the sequence used in emit_flush_gpu_tlb but the invalidation is now a single packet instead of the 3 packets required to implement reg_write_reg_wait. Signed-off-by: Pierre-Eric Pelloux-Prayer --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3d36329be384..8d0c8f2cac06 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1175,7 +1175,28 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_vmhub *hub = >adev->vmhub[ring->funcs->vmhub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + + /* Update the PD address for this VMID. */ + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + /* Trigger invalidation. */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) | + SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) | + SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f)); + amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, 0x); + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) | + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F)); } static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, -- 2.40.0
[PATCH v2] drm/amdgpu: Force signal hw_fences that are embedded in non-sched jobs
v2: Add comments to clarify in the code. [Why] For engines not supporting soft reset, i.e. VCN, there will be a failed ib test before mode 1 reset during asic reset. The fences in this case are never signaled and next time when we try to free the sa_bo, kernel will hang. [How] During pre_asic_reset, driver will clear job fences and afterwards the fences' refcount will be reduced to 1. For drm_sched_jobs it will be released in job_free_cb, and for non-sched jobs like ib_test, it's meant to be released in sa_bo_free but only when the fences are signaled. So we have to force signal the non_sched bad job's fence during pre_asic_reset or the clear is not complete. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index faff4a3f96e6..ad7c5b70c35a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -673,6 +673,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) { int i; struct dma_fence *old, **ptr; + struct amdgpu_job *job; for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { ptr = >fence_drv.fences[i]; @@ -680,6 +681,13 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) if (old && old->ops == _job_fence_ops) { RCU_INIT_POINTER(*ptr, NULL); dma_fence_put(old); + /* For non-sched bad job, i.e. failed ib test, we need to force +* signal it right here or we won't be able to track them in fence drv +* and they will remain unsignaled during sa_bo free. +*/ + job = container_of(old, struct amdgpu_job, hw_fence); + if (!job->base.s_fence && !dma_fence_is_signaled(old)) + dma_fence_signal(old); } } } -- 2.25.1
[PATCH] drm/amdgpu: Force signal hw_fences that are embedded in non-sched jobs
[Why] For engines not supporting soft reset, i.e. VCN, there will be a failed ib test before mode 1 reset during asic reset. The fences in this case are never signaled and next time when we try to free the sa_bo, kernel will hang. [How] During pre_asic_reset, driver will clear job fences and afterwards the fences' refcount will be reduced to 1. For drm_sched_jobs it will be released in job_free_cb, and for non-sched jobs like ib_test, it's meant to be released in sa_bo_free but only when the fences are signaled. So we have to force signal the non_sched bad job's fence during pre_asic_reset or the clear is not complete. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index faff4a3f96e6..2e549bd50990 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -673,6 +673,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) { int i; struct dma_fence *old, **ptr; + struct amdgpu_job *job; for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { ptr = >fence_drv.fences[i]; @@ -680,6 +681,9 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) if (old && old->ops == _job_fence_ops) { RCU_INIT_POINTER(*ptr, NULL); dma_fence_put(old); + job = container_of(old, struct amdgpu_job, hw_fence); + if (!job->base.s_fence && !dma_fence_is_signaled(old)) + dma_fence_signal(old); } } } -- 2.25.1
[PATCH] drm/amdgpu: Add nv mailbox irq in soc21
Under virtualization guest needs to receive notification from host to perform reset in some cases. Add nv mailbox irq in soc21. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/soc21.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index c559f9bfc36d..41b94c824717 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -43,6 +43,7 @@ #include "soc15.h" #include "soc15_common.h" #include "soc21.h" +#include "mxgpu_nv.h" static const struct amd_ip_funcs soc21_common_ip_funcs; @@ -659,19 +660,31 @@ static int soc21_common_early_init(void *handle) return -EINVAL; } - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } return 0; } static int soc21_common_late_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + return 0; } static int soc21_common_sw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + return 0; } @@ -709,6 +722,9 @@ static int soc21_common_hw_fini(void *handle) /* disable the doorbell aperture */ soc21_enable_doorbell_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_put_irq(adev); + return 0; } -- 2.25.1
[PATCH] drm/amdgpu: Stop clearing kiq position during unload
Do not clear kiq position in RLC_CP_SCHEDULER so that CP could perform IDLE-SAVE after VF fini. CPG also needs to be active in save command. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 4 +++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 9447999a3a48..232ccd639184 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4411,15 +4411,13 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_mes_kiq_hw_fini(adev); } - if (amdgpu_sriov_vf(adev)) { - gfx_v11_0_cp_gfx_enable(adev, false); - /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ - tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); - tmp &= 0xff00; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - + if (amdgpu_sriov_vf(adev)) + /* Remove the steps disabling CPG and clearing KIQ position, +* so that CP could perform IDLE-SAVE during switch. +* Those steps were to avoid a DMAR error in gfx9 but it is not reproduced on gfx11. +*/ return 0; - } + gfx_v11_0_cp_enable(adev, false); gfx_v11_0_enable_gui_idle_interrupt(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 7bfe862aa83e..02ad84a1526a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1248,7 +1248,9 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) if (adev->mes.ring.sched.ready) mes_v11_0_kiq_dequeue_sched(adev); - mes_v11_0_enable(adev, false); + if (!amdgpu_sriov_vf(adev)) + mes_v11_0_enable(adev, false); + return 0; } -- 2.25.1
[PATCH] drm/amdgpu: Disable GFX RAS feature for SRIOV case
In sriov guest side doesn't need init ras feature, so skip it. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 84a76c36d9a7..be8ed617e269 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4707,7 +4707,7 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { + if (!amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { r = gfx_v11_0_ras_late_init(handle); if (r) return r; -- 2.25.1
[PATCH] drm/amdgpu: skip mes self test for gc 11.0.3 in recover
Temporary disable mes self teset for gc 11.0.3 during gpu_recovery. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e0445e8cc342..5b8362727226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5381,7 +5381,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_start(>sched, !tmp_adev->asic_reset_res); } - if (adev->enable_mes) + if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) amdgpu_mes_self_test(tmp_adev); if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { -- 2.25.1
[PATCH v3] drm/amdgpu: dequeue mes scheduler during fini
[Why] If mes is not dequeued during fini, mes will be in an uncleaned state during reload, then mes couldn't receive some commands which leads to reload failure. [How] Perform MES dequeue via MMIO after all the unmap jobs are done by mes and before kiq fini. v3: Move the dequeue operation inside kiq_hw_fini. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 42 -- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1174dcc88db5..b477bed40d61 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1151,6 +1151,42 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + + mutex_lock(>srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; @@ -1202,6 +1238,9 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { + if (adev->mes.ring.sched.ready) + mes_v11_0_kiq_dequeue_sched(adev); + mes_v11_0_enable(adev, false); return 0; } @@ -1257,9 +1296,6 @@ static int mes_v11_0_hw_init(void *handle) static int mes_v11_0_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; return 0; } -- 2.25.1
[PATCH v3] drm/amdgpu: dequeue mes scheduler during fini
Update: Remove redundant comments as Christian suggests. [Why] If mes is not dequeued during fini, mes will be in an uncleaned state during reload, then mes couldn't receive some commands which leads to reload failure. [How] Perform MES dequeue via MMIO after all the unmap jobs are done by mes and before kiq fini. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 3 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 41 +++-- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index ad980f4b66e1..ea8efa52503b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -130,6 +130,9 @@ struct amdgpu_mes { int (*kiq_hw_init)(struct amdgpu_device *adev); int (*kiq_hw_fini)(struct amdgpu_device *adev); + /* dequeue sched pipe via kiq */ + void(*kiq_dequeue_sched)(struct amdgpu_device *adev); + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 257b2e4de600..7c75758f58e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4406,6 +4406,9 @@ static int gfx_v11_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); + if (adev->mes.ring.sched.ready && adev->mes.kiq_dequeue_sched) + adev->mes.kiq_dequeue_sched(adev); + amdgpu_mes_kiq_hw_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b48684db2832..eef29646b074 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 @@ -1078,6 +1079,7 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.funcs = _v11_0_funcs; adev->mes.kiq_hw_init = _v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = _v11_0_kiq_hw_fini; + adev->mes.kiq_dequeue_sched = _v11_0_kiq_dequeue_sched; r = amdgpu_mes_init(adev); if (r) @@ -1151,6 +1153,42 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + + mutex_lock(>srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; @@ -1257,9 +1295,6 @@ static int mes_v11_0_hw_init(void *handle) static int mes_v11_0_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; return 0; } -- 2.25.1
[PATCH v2] drm/amdgpu: dequeue mes scheduler during fini
Resend to fix coding style issue. [Why] If mes is not dequeued during fini, mes will be in an uncleaned state during reload, then mes couldn't receive some commands which leads to reload failure. [How] Perform MES dequeue via MMIO after all the unmap jobs are done by mes and before kiq fini. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 3 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 47 +++-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index ad980f4b66e1..ea8efa52503b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -130,6 +130,9 @@ struct amdgpu_mes { int (*kiq_hw_init)(struct amdgpu_device *adev); int (*kiq_hw_fini)(struct amdgpu_device *adev); + /* dequeue sched pipe via kiq */ + void(*kiq_dequeue_sched)(struct amdgpu_device *adev); + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 257b2e4de600..7c75758f58e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4406,6 +4406,9 @@ static int gfx_v11_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); + if (adev->mes.ring.sched.ready && adev->mes.kiq_dequeue_sched) + adev->mes.kiq_dequeue_sched(adev); + amdgpu_mes_kiq_hw_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b48684db2832..837ff485dab6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 @@ -1078,6 +1079,7 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.funcs = _v11_0_funcs; adev->mes.kiq_hw_init = _v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = _v11_0_kiq_hw_fini; + adev->mes.kiq_dequeue_sched = _v11_0_kiq_dequeue_sched; r = amdgpu_mes_init(adev); if (r) @@ -1151,6 +1153,42 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + + mutex_lock(>srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; @@ -1257,9 +1295,12 @@ static int mes_v11_0_hw_init(void *handle) static int mes_v11_0_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; + /* +* Do not disable mes.ring.sched.ready here, since mes +* is still used to unmap other rings. +* We'll set mes.ring.sched.ready to false when mes ring +* is dequeued. +*/ return 0; } -- 2.25.1
[PATCH] drm/amdgpu: dequeue mes scheduler during fini
[Why] If mes is not dequeued during fini, mes will be in an uncleaned state during reload, then mes couldn't receive some commands which leads to reload failure. [How] Perform MES dequeue via MMIO after all the unmap jobs are done by mes and before kiq fini. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 3 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 47 +++-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index ad980f4b66e1..6c9208e8f027 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -130,6 +130,9 @@ struct amdgpu_mes { int (*kiq_hw_init)(struct amdgpu_device *adev); int (*kiq_hw_fini)(struct amdgpu_device *adev); + /* dequeue sched pipe via kiq */ + void (*kiq_dequeue_sched)(struct amdgpu_device *adev); + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 257b2e4de600..7c75758f58e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4406,6 +4406,9 @@ static int gfx_v11_0_hw_fini(void *handle) if (amdgpu_gfx_disable_kcq(adev)) DRM_ERROR("KCQ disable failed\n"); + if (adev->mes.ring.sched.ready && adev->mes.kiq_dequeue_sched) + adev->mes.kiq_dequeue_sched(adev); + amdgpu_mes_kiq_hw_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b48684db2832..79fcf7c304cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 @@ -1078,6 +1079,7 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.funcs = _v11_0_funcs; adev->mes.kiq_hw_init = _v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = _v11_0_kiq_hw_fini; + adev->mes.kiq_dequeue_sched = _v11_0_kiq_dequeue_sched; r = amdgpu_mes_init(adev); if (r) @@ -1151,6 +1153,42 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + +mutex_lock(>srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; @@ -1257,9 +1295,12 @@ static int mes_v11_0_hw_init(void *handle) static int mes_v11_0_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; + /* +* Do not disable mes.ring.sched.ready here, since mes +* is still used to unmap other rings. +* We'll set mes.ring.sched.ready to false when mes ring +* is dequeued. +*/ return 0; } -- 2.25.1
[PATCH] drm/amdgpu: Fix use-after-free in amdgpu_cs_ioctl
Hello, This patch is reviewed by Andrey and Christian and pushed into bringup temp branch. It need to be cherry-picked to drm-next, too. Does anyone has any comments on this patch? Thanks, Yubiao Wang [Why] In amdgpu_cs_ioctl, amdgpu_job_free could be performed ealier if there is -ERESTARTSYS error. In this case, job->hw_fence could be not initialized yet. Putting hw_fence during amdgpu_job_free could lead to a use-after-free warning. [How] Check if drm_sched_job_init is performed before job_free by checking s_fence. v2: Check hw_fence.ops instead since it could be NULL if fence is not initialized. Reverse the condition since !=NULL check is discouraged in kernel. Signed-off-by: YuBiao Wang Reviewed-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 8f51adf3b329..1062b7ed74ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -162,7 +162,10 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(>sync); amdgpu_sync_free(>sched_sync); - dma_fence_put(>hw_fence); + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(>hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, -- 2.25.1
[PATCH] drm/amd/amdgpu: Fix csb.bo pin_count leak on gfx 9
[Why] csb bo is not unpinned in gfx 9. It will lead to pin_count leak on driver unload. [How] Call bo_free_kernel corresponding to bo_create_kernel in gfx_rlc_init_csb. This will also unify the code path with other gfx versions. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7f944bb11298..be803ebd543c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2462,7 +2462,9 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); - amdgpu_bo_unref(>gfx.rlc.clear_state_obj); + amdgpu_bo_free_kernel(>gfx.rlc.clear_state_obj, + >gfx.rlc.clear_state_gpu_addr, + (void **)>gfx.rlc.cs_ptr); if (adev->flags & AMD_IS_APU) { amdgpu_bo_free_kernel(>gfx.rlc.cp_table_obj, >gfx.rlc.cp_table_gpu_addr, -- 2.25.1
[PATCH v2] drm/amd/amdgpu: Avoid writing GMC registers under sriov in gmc9
[Why] For Vega10, disabling gart of gfxhub could mess up KIQ and PSP under sriov mode, and lead to DMAR on host side. [How] Skip writing GMC registers under sriov. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 26 +--- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index bda1542ef1dd..f9a7349eb601 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -348,18 +348,20 @@ static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); - /* Setup TLB control */ - tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 0); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - - /* Setup L2 cache */ - WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); + } } /** -- 2.25.1
[PATCH] drm/amd/amdgpu: Avoid writing GMC registers under sriov in gmc9
[Why] For Vega10, disabling gart of gfxhub and mmhub could mess up KIQ and PSP under sriov mode, and lead to DMAR on host side. [How] Skip writing GMC registers under sriov. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 26 +--- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index bda1542ef1dd..f9a7349eb601 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -348,18 +348,20 @@ static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); - /* Setup TLB control */ - tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 0); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - - /* Setup L2 cache */ - WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); + } } /** -- 2.25.1
[PATCH] drm/amd/amdgpu: Avoid writing GMC registers under sriov in gmc9
[Why] For Vega10, disabling gart of gfxhub and mmhub could mess up KIQ and PSP under sriov mode, and lead to DMAR on host side. [How] Do not call gmc_gart_disable under sriov but keep vram_unpin to avoid pin_count leak. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df534..365059a20ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1793,14 +1793,13 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); - amdgpu_gart_table_vram_unpin(adev); } static int gmc_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v9_0_gart_disable(adev); + amdgpu_gart_table_vram_unpin(adev); if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ @@ -1808,6 +1807,7 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + gmc_v9_0_gart_disable(adev); amdgpu_irq_put(adev, >gmc.ecc_irq, 0); amdgpu_irq_put(adev, >gmc.vm_fault, 0); -- 2.25.1
[PATCH] drm/amd/amdgpu: Do irq_fini_hw after ip_fini_early
Some IP such as SMU need irq_put to perform hw_fini. So move irq_fini_hw after ip_fini. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4c8f2f4647c0..18e26a78ef82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3864,10 +3864,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ucode_sysfs_fini(adev); sysfs_remove_files(>dev->kobj, amdgpu_dev_attributes); - amdgpu_irq_fini_hw(adev); - amdgpu_device_ip_fini_early(adev); + amdgpu_irq_fini_hw(adev); + ttm_device_clear_dma_mappings(>mman.bdev); amdgpu_gart_dummy_page_fini(adev); -- 2.25.1
[PATCH] drm/amd/amdgpu: Add ready_to_reset resp for vega10
Send response to host after received the flr notification from host. Port NV change to vega10. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index ff2307d7ee0f..23b066bcffb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -258,6 +258,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) amdgpu_virt_fini_data_exchange(adev); atomic_set(>in_gpu_reset, 1); + xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); + do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 50572635d0f8..bd3b23171579 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -37,6 +37,7 @@ enum idh_request { IDH_REQ_GPU_RESET_ACCESS, IDH_LOG_VF_ERROR = 200, + IDH_READY_TO_RESET = 201, }; enum idh_event { -- 2.25.1
[PATCH] drm/amd/amdgpu:flush ttm delayed work before cancel_sync
[Why] In some cases when we unload driver, warning call trace will show up in vram_mgr_fini which claims that LRU is not empty, caused by the ttm bo inside delay deleted queue. [How] We should flush delayed work to make sure the delay deleting is done. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4d266c40382c..0b5764aa98a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3824,8 +3824,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(>delayed_init_work); - if (adev->mman.initialized) + if (adev->mman.initialized) { + flush_delayed_work(>mman.bdev.wq); ttm_bo_lock_delayed_workqueue(>mman.bdev); + } adev->shutdown = true; /* make sure IB test finished before entering exclusive mode -- 2.25.1
[PATCH] drm/amd/amdgpu: skip locking delayed work if not initialized.
When init failed in early init stage, amdgpu_object has not been initialized, so hasn't the ttm delayed queue functions. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9e53ff851496..4c33985542ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3825,7 +3825,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(>delayed_init_work); - ttm_bo_lock_delayed_workqueue(>mman.bdev); + if (adev->mman.initialized) + ttm_bo_lock_delayed_workqueue(>mman.bdev); adev->shutdown = true; /* make sure IB test finished before entering exclusive mode -- 2.25.1
[PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5)
[Why] GPU timing counters are read via KIQ under sriov, which will introduce a delay. [How] It could be directly read by MMIO. v2: Add additional check to prevent carryover issue. v3: Only check for carryover for once to prevent performance issue. v4: Add comments of the rough frequency where carryover happens. v5: Remove mutex and gfxoff ctrl unused with current timing registers. Signed-off-by: YuBiao Wang Acked-by: Horace Chen --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ff7e9f49040e..5f4eae9c9526 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7609,10 +7609,8 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(>gfx.gpu_clock_mutex); switch (adev->asic_type) { case CHIP_VANGOGH: case CHIP_YELLOW_CARP: @@ -7620,12 +7618,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + /* The GFX clock frequency is 100MHz, which sets 32-bit carry over +* roughly every 42 seconds. +*/ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + } + clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL); break; } - mutex_unlock(>gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v4)
[Why] GPU timing counters are read via KIQ under sriov, which will introduce a delay. [How] It could be directly read by MMIO. v2: Add additional check to prevent carryover issue. v3: Only check for carryover for once to prevent performance issue. v4: Add comments of the rough frequency where carryover happens. Signed-off-by: YuBiao Wang Acked-by: Horace Chen --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ff7e9f49040e..9355494002a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7609,7 +7609,7 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(>gfx.gpu_clock_mutex); @@ -7620,8 +7620,15 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + /* Carryover happens every 4 Giga time cycles counts which is roughly 42 secs */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + } + clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL); break; } mutex_unlock(>gfx.gpu_clock_mutex); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay
[Why] GPU timing counters are read via KIQ under sriov, which will introduce a delay. [How] It could be directly read by MMIO. v2: Add additional check to prevent carryover issue. v3: Only check for carryover for once to prevent performance issue. Signed-off-by: YuBiao Wang Acked-by: Horace Chen --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ff7e9f49040e..82a5b7ab8dc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7610,6 +7610,7 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + uint64_t clock_lo, clock_hi, hi_check; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(>gfx.gpu_clock_mutex); @@ -7620,8 +7621,15 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + // If carryover happens, update lower count again. + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + } + clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL); break; } mutex_unlock(>gfx.gpu_clock_mutex); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay
[Why] GPU timing counters are read via KIQ under sriov, which will introduce a delay. [How] It could be directly read by MMIO. v2: Add additional check to prevent carryover issue. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ff7e9f49040e..191b9e3ee3ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7610,6 +7610,7 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + uint64_t clock_lo, clock_hi, hi_check; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(>gfx.gpu_clock_mutex); @@ -7620,8 +7621,16 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + // If carry happens, continuously read until no carry happens + while (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + } + clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL); break; } mutex_unlock(>gfx.gpu_clock_mutex); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: reset psp ring wptr during ring_create
[Why] psp ring wptr is not initialized properly in ring_create, which would lead to psp failure after several gpu reset. [How] Set ring_wptr to zero in psp_ring_create. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 0fd1ed918627..3e6218799a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -468,6 +468,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if (amdgpu_sriov_vf(adev)) { + ring->ring_wptr = 0; ret = psp_v11_0_ring_stop(psp, ring_type); if (ret) { DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 908664a5774b..be05d9cbd41e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -231,6 +231,7 @@ static int psp_v3_1_ring_create(struct psp_context *psp, psp_v3_1_reroute_ih(psp); if (amdgpu_sriov_vf(adev)) { + ring->ring_wptr = 0; ret = psp_v3_1_ring_stop(psp, ring_type); if (ret) { DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: psp program IH_RB_CTRL on sienna_cichlid
[Why] IH_RB_CNTL is blocked by PSP so we need to ask psp to help config it. [How] Move psp ip block before ih, and use psp to program IH_RB_CNTL under sriov. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 20 ++-- drivers/gpu/drm/amd/amdgpu/nv.c| 12 +--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f4e4040bbd25..5ee923ccdeb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -151,7 +151,15 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, /* enable_intr field is only valid in ring0 */ if (ih == >irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (enable) { ih->enabled = true; @@ -261,7 +269,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); } - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (ih == >irq.ih) { /* set the ih ring 0 writeback address whether it's enabled or not */ diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 75d1f9b939b2..2ec5d4e1f363 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -764,9 +764,15 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: amdgpu_device_ip_block_add(adev, _common_ip_block); amdgpu_device_ip_block_add(adev, _v10_0_ip_block); - amdgpu_device_ip_block_add(adev, _ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + if (!amdgpu_sriov_vf(adev)) { + amdgpu_device_ip_block_add(adev, _ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + } else { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + amdgpu_device_ip_block_add(adev, _ih_ip_block); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: psp program IH_RB_CTRL on navi12 and sienna_cichlid
[Why] IH_RB_CNTL is blocked by PSP so we need to ask psp to help config it. [How] Move psp ip block before ih, and use psp to program IH_RB_CNTL under sriov. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 20 ++-- drivers/gpu/drm/amd/amdgpu/nv.c| 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f4e4040bbd25..5ee923ccdeb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -151,7 +151,15 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, /* enable_intr field is only valid in ring0 */ if (ih == >irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (enable) { ih->enabled = true; @@ -261,7 +269,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); } - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (ih == >irq.ih) { /* set the ih ring 0 writeback address whether it's enabled or not */ diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 75d1f9b939b2..aebd330daaca 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -742,8 +742,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_NAVI12: amdgpu_device_ip_block_add(adev, _common_ip_block); amdgpu_device_ip_block_add(adev, _v10_0_ip_block); - amdgpu_device_ip_block_add(adev, _ih_ip_block); amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + amdgpu_device_ip_block_add(adev, _ih_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) @@ -764,9 +764,9 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: amdgpu_device_ip_block_add(adev, _common_ip_block); amdgpu_device_ip_block_add(adev, _v10_0_ip_block); - amdgpu_device_ip_block_add(adev, _ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + amdgpu_device_ip_block_add(adev, _ih_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/amdgpu: psp program IH_RB_CTRL on navi12 and navi21
[Why] IH_RB_CNTL is blocked by PSP so we need to ask psp to help config it. [How] Move psp ip block before ih, and use psp to program IH_RB_CNTL under sriov. Signed-off-by: YuBiao Wang --- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 20 ++-- drivers/gpu/drm/amd/amdgpu/nv.c| 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f4e4040bbd25..5ee923ccdeb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -151,7 +151,15 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, /* enable_intr field is only valid in ring0 */ if (ih == >irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (enable) { ih->enabled = true; @@ -261,7 +269,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); } - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(>psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (ih == >irq.ih) { /* set the ih ring 0 writeback address whether it's enabled or not */ diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 75d1f9b939b2..aebd330daaca 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -742,8 +742,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_NAVI12: amdgpu_device_ip_block_add(adev, _common_ip_block); amdgpu_device_ip_block_add(adev, _v10_0_ip_block); - amdgpu_device_ip_block_add(adev, _ih_ip_block); amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + amdgpu_device_ip_block_add(adev, _ih_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) @@ -764,9 +764,9 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: amdgpu_device_ip_block_add(adev, _common_ip_block); amdgpu_device_ip_block_add(adev, _v10_0_ip_block); - amdgpu_device_ip_block_add(adev, _ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); + amdgpu_device_ip_block_add(adev, _ih_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, _v11_0_ip_block); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx