Re: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF (v2)
Thank you, Jerry. You can find more detailed discussion in previous thread. — Sincerely Yours, Pixel On 07/02/2017, 3:03 PM, "Zhang, Jerry"wrote: >If that info is enough for vf. >Reviewed-by: Junwei Zhang > >Regards, >Jerry (Junwei Zhang) > >Linux Base Graphics >SRDC Software Development >_ > > >> -Original Message- >> From: Pixel Ding [mailto:pixel.d...@amd.com] >> Sent: Tuesday, February 07, 2017 14:07 >> To: Zhou, David(ChunMing); Koenig, Christian; Zhang, Jerry >> Cc: amd-gfx@lists.freedesktop.org; Ding, Pixel >> Subject: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF (v2) >> >> VF uses KIQ to access registers. When VM fault occurs, the driver can't get >> back >> the fence of KIQ submission and runs into CPU soft lockup. >> >> v2: print IV entry info >> >> Signed-off-by: Pixel Ding >> --- >> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 7 +++ >> 1 file changed, 7 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> index 7669b32..6502508 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> @@ -1237,6 +1237,13 @@ static int gmc_v8_0_process_interrupt(struct >> amdgpu_device *adev, { >> u32 addr, status, mc_client; >> >> +if (amdgpu_sriov_vf(adev)) { >> +dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", >> +entry->src_id, entry->src_data); >> +dev_err(adev->dev, " Can't decode VM fault info here on SRIOV >> VF\n"); >> +return 0; >> +} >> + >> addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); >> status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); >> mc_client = >> RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); >> -- >> 2.7.4 > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/20] drm/amdgpu:minor cleanup
Change-Id: Ia5ada3e9990261ca70b03655424e6290701cdb9d Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index fd29124..4029d32 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4864,10 +4864,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = >gfx.kiq; uint64_t eop_gpu_addr; - bool is_kiq = false; - - if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) - is_kiq = true; + bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); if (is_kiq) { eop_gpu_addr = kiq->eop_gpu_addr; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
答复: [PATCH 09/20] drm/amdgpu:implement SRIOV gpu_reset
patch 1-8 are some fixes for sriov gpu reset feature patch 9 -20 are for sriov gpu reset BR Monk 发件人: amd-gfx代表 Monk Liu 发送时间: 2017年2月7日 14:11:07 收件人: amd-gfx@lists.freedesktop.org 抄送: Liu, Monk 主题: [PATCH 09/20] drm/amdgpu:implement SRIOV gpu_reset Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 158 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e926f84..2b404ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1604,6 +1604,53 @@ int amdgpu_suspend(struct amdgpu_device *adev) return 0; } +static int amdgpu_resume_early(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) + r = adev->ip_blocks[i].version->funcs->resume(adev); + + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + +static int amdgpu_resume_late(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; + + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + static int amdgpu_resume(struct amdgpu_device *adev) { int i, r; @@ -2343,6 +2390,115 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, } /** + * amdgpu_sriov_gpu_reset - reset the asic + * + * @adev: amdgpu device pointer + * @voluntary: if this reset is requested by guest. + * (true means by guest and false means by HYPERVISOR ) + * + * Attempt the reset the GPU if it has hung (all asics). + * for SRIOV case. + * Returns 0 for success or an error on failure. + */ +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +{ + int i, r = 0; + int resched; + struct amdgpu_bo *bo, *tmp; + struct amdgpu_ring *ring; + struct fence *fence = NULL, *next = NULL; + + mutex_lock(>virt.lock_reset); + atomic_inc(>gpu_reset_counter); + + /* block TTM */ + resched = ttm_bo_lock_delayed_workqueue(>mman.bdev); + + /* block scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + kthread_park(ring->sched.thread); + amd_sched_hw_job_reset(>sched); + } + + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion(adev); + + /* request to take full control of GPU before re-initialization */ + if (voluntary) + amdgpu_virt_reset_gpu(adev); + else + amdgpu_virt_request_full_gpu(adev, true); + + + /* Resume IP prior to SMC */ + amdgpu_resume_early(adev); + + /* we need recover gart prior to run SMC/CP/SDMA resume */ + amdgpu_ttm_recover_gart(adev); + + /* now we are okay to resume SMC/CP/SDMA */ + amdgpu_resume_late(adev); + + amdgpu_irq_gpu_reset_resume_helper(adev); + + if (amdgpu_ib_ring_tests(adev)) + dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); + + /* rellease full control of GPU after ib test */ + amdgpu_virt_release_full_gpu(adev, true); + + DRM_INFO("recover vram bo from shadow\n"); + + ring = adev->mman.buffer_funcs_ring; + mutex_lock(>shadow_list_lock); + list_for_each_entry_safe(bo, tmp, >shadow_list, shadow_list) { +
[PATCH] drm/amdgpu/virt: skip VM fault handler for VF (v2)
VF uses KIQ to access registers. When VM fault occurs, the driver can't get back the fence of KIQ submission and runs into CPU soft lockup. v2: print IV entry info Signed-off-by: Pixel Ding--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..6502508 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1237,6 +1237,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, { u32 addr, status, mc_client; + if (amdgpu_sriov_vf(adev)) { + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data); + dev_err(adev->dev, " Can't decode VM fault info here on SRIOV VF\n"); + return 0; + } + addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 18/20] drm/amdgpu:alloc mqd backup
Change-Id: I84f821faa657a5d942c33d30f206eb66b579c2f8 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 10 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bdb47f7..a801fde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -781,6 +781,7 @@ struct amdgpu_mec { u32 num_pipe; u32 num_mec; u32 num_queue; + struct vi_mqd *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6734e55..5f688d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7309,6 +7309,11 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } /* create MQD for each KCQ */ @@ -7323,6 +7328,11 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 16/20] drm/amdgpu:RUNTIME flag should clr later
this flag will get cleared by request gpu access Change-Id: Ie484bb0141420055370e019dcd8c110fb34f8a1b Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 53fa590c..64d2fd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -507,9 +507,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); /* wait until RCV_MSG become 3 */ - if (!xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) - adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; - else { + if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) + { pr_err("failed to recieve FLR_CMPL\n"); return; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 20/20] drm/amdgpu:fix kiq_resume routine
use is_load_stage to fix compute ring test failure issue which occured after FLR/gpu_reset. we need backup a clean status of MQD which was created in drv load stage, and use it in resume stage, otherwise KCQ and KIQ all may faild in ring/ib test. Change-Id: I41be940454a6638e9a8a05f096601eaa1fbebaab Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 44 ++- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0ce00ff..4a641d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4877,24 +4877,46 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = >gfx.kiq; uint64_t eop_gpu_addr; bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; if (is_kiq) { eop_gpu_addr = kiq->eop_gpu_addr; gfx_v8_0_kiq_setting(>ring); - } else + } else { eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + ring->queue * MEC_HPD_SIZE; + mqd_idx = ring - >gfx.compute_ring[0]; + } - mutex_lock(>srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + if (adev->is_load_stage) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(>srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); + if (is_kiq) + gfx_v8_0_kiq_init_register(adev, mqd, ring); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); - gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - if (is_kiq) - gfx_v8_0_kiq_init_register(adev, mqd, ring); - - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(>srbm_mutex); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + if (is_kiq) { + mutex_lock(>srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_kiq_init_register(adev, mqd, ring); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(>srbm_mutex); + } + } if (is_kiq) gfx_v8_0_kiq_enable(ring); @@ -4913,9 +4935,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) ring = >gfx.kiq.ring; if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)>mqd_ptr)) { - memset((void *)ring->mqd_ptr, 0, sizeof(struct vi_mqd)); r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; if (r) return r; } else { @@ -4925,9 +4947,9 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = >gfx.compute_ring[i]; if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)>mqd_ptr)) { - memset((void *)ring->mqd_ptr, 0, sizeof(struct vi_mqd)); r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; if (r) return r; } else { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 19/20] drm/amdgpu:use nop to clear ring buffer
this is for a fine GPU reset/resume, which should use nop clear ringbuffer prior to kickoff engine. and also use the same clear macro in ring_init. Change-Id: I7693891fd4431d64c025d052f1dd0ba797f2f0b7 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 7 +++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7bacf3c..37d8422 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -230,7 +230,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, dev_err(adev->dev, "(%d) ring create failed\n", r); return r; } - memset((void *)ring->ring, 0, ring->ring_size); + amdgpu_ring_clear_ring(ring); } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->max_dw = max_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 0e57b04..3fd4ce8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -186,5 +186,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) +{ + int i = 0; + while (i <= ring->ptr_mask) + ring->ring[i++] = ring->funcs->nop; + +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f688d4..0ce00ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4509,6 +4509,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) } /* start the ring */ + amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); ring->ready = true; r = amdgpu_ring_test_ring(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 9394ca6..d5206f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -615,6 +615,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = >sdma.instance[i].ring; + amdgpu_ring_clear_ring(ring); wb_offset = (ring->rptr_offs * 4); mutex_lock(>srbm_mutex); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 12/20] drm/amdgpu:impl mm_r/weg_nokiq
some registers are pf copy, and we can safely use mmio method to access them. and some time we are forbid to use kiq to access register like in INTR context. Change-Id: Ie6dc323dc86829a4a6ceb7073c269b106b534c4a Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 10 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 36 ++ 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 402a895..5dd0615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1509,6 +1509,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); +uint32_t amdgpu_mm_rreg_nokiq(struct amdgpu_device *adev, uint32_t reg, + bool always_indirect); +void amdgpu_mm_wreg_nokiq(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + bool always_indirect); + uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, bool always_indirect); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, @@ -1523,6 +1528,11 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); /* * Registers read & write functions. */ +#define RREG32_nokiq(reg) amdgpu_mm_rreg_nokiq(adev, (reg), false) +#define RREG32_IDX_nokiq(reg) amdgpu_mm_rreg(adev, (reg), true) +#define WREG32_nokiq(reg, v) amdgpu_mm_wreg_nokiq(adev, (reg), (v), false) +#define WREG32_IDX_nokiq(reg, v) amdgpu_mm_wreg_nokiq(adev, (reg), (v), true) + #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false) #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b404ca..d5870d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -136,6 +136,42 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, } } +uint32_t amdgpu_mm_rreg_nokiq(struct amdgpu_device *adev, uint32_t reg, + bool always_indirect) +{ + uint32_t ret; + + if ((reg * 4) < adev->rmmio_size && !always_indirect) + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + else { + unsigned long flags; + + spin_lock_irqsave(>mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(>mmio_idx_lock, flags); + } + trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); + return ret; +} + +void amdgpu_mm_wreg_nokiq(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + bool always_indirect) +{ + trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); + + if ((reg * 4) < adev->rmmio_size && !always_indirect) + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + else { + unsigned long flags; + + spin_lock_irqsave(>mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(>mmio_idx_lock, flags); + } +} + u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) { if ((reg * 4) < adev->rio_mem_size) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 17/20] drm/amdgpu:new field is_load_stage introduced
use it to seperate first driver load and later reset/resume Change-Id: I991e0da52ccd197716d279bf9014de46d39acfea Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5dd0615..bdb47f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1493,6 +1493,7 @@ struct amdgpu_device { /* link all gtt */ spinlock_t gtt_list_lock; struct list_headgtt_list; + boolis_load_stage; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d5870d0..5be0481 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1800,6 +1800,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gc_cac_wreg = _invalid_wreg; adev->audio_endpt_rreg = _block_invalid_rreg; adev->audio_endpt_wreg = _block_invalid_wreg; + adev->is_load_stage = true; DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", @@ -2010,6 +2011,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } + adev->is_load_stage = false; return 0; failed: -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 14/20] drm/amdgpu:use nokiq version mm access
Change-Id: I383d7ce858a136d7b112180f86e3d632d37b4d1c Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 5fe4aad..4e9e0bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -321,12 +321,12 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev) int timeout = VI_MAILBOX_TIMEDOUT; u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, RCV_MSG_ACK, 1); - WREG32(mmMAILBOX_CONTROL, reg); + WREG32_nokiq(mmMAILBOX_CONTROL, reg); /*Wait for RCV_MSG_VALID to be 0*/ - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); while (reg & mask) { if (timeout <= 0) { pr_err("RCV_MSG_VALID is not cleared\n"); @@ -335,7 +335,7 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev) mdelay(1); timeout -=1; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); } } @@ -343,10 +343,10 @@ static void xgpu_vi_mailbox_set_valid(struct amdgpu_device *adev, bool val) { u32 reg; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, TRN_MSG_VALID, val ? 1 : 0); - WREG32(mmMAILBOX_CONTROL, reg); + WREG32_nokiq(mmMAILBOX_CONTROL, reg); } static void xgpu_vi_mailbox_trans_msg(struct amdgpu_device *adev, @@ -356,10 +356,10 @@ static void xgpu_vi_mailbox_trans_msg(struct amdgpu_device *adev, xgpu_vi_mailbox_send_ack(adev); - reg = RREG32(mmMAILBOX_MSGBUF_TRN_DW0); + reg = RREG32_nokiq(mmMAILBOX_MSGBUF_TRN_DW0); reg = REG_SET_FIELD(reg, MAILBOX_MSGBUF_TRN_DW0, MSGBUF_DATA, event); - WREG32(mmMAILBOX_MSGBUF_TRN_DW0, reg); + WREG32_nokiq(mmMAILBOX_MSGBUF_TRN_DW0, reg); xgpu_vi_mailbox_set_valid(adev, true); } @@ -370,11 +370,11 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev, u32 reg; u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); if (!(reg & mask)) return -ENOENT; - reg = RREG32(mmMAILBOX_MSGBUF_RCV_DW0); + reg = RREG32_nokiq(mmMAILBOX_MSGBUF_RCV_DW0); if (reg != event) return -ENOENT; @@ -390,7 +390,7 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, TRN_MSG_ACK); u32 reg; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); while (!(reg & mask)) { if (timeout <= 0) { pr_err("Doesn't get ack from pf.\n"); @@ -400,7 +400,7 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) msleep(1); timeout -= 1; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_nokiq(mmMAILBOX_CONTROL); } return r; @@ -492,11 +492,11 @@ static int xgpu_vi_set_mailbox_ack_irq(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - u32 tmp = RREG32(mmMAILBOX_INT_CNTL); + u32 tmp = RREG32_nokiq(mmMAILBOX_INT_CNTL); tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, ACK_INT_EN, (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); - WREG32(mmMAILBOX_INT_CNTL, tmp); + WREG32_nokiq(mmMAILBOX_INT_CNTL, tmp); return 0; } @@ -521,11 +521,11 @@ static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - u32 tmp = RREG32(mmMAILBOX_INT_CNTL); + u32 tmp = RREG32_nokiq(mmMAILBOX_INT_CNTL); tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, VALID_INT_EN, (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); - WREG32(mmMAILBOX_INT_CNTL, tmp); + WREG32_nokiq(mmMAILBOX_INT_CNTL, tmp); return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 13/20] Refine handshake between guest and host by mailbox
From: Ken XueChange-Id: If3a7d05824847234759b86563e8052949e171972 Signed-off-by: Ken Xue --- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index d2622b6..5fe4aad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -318,10 +318,25 @@ void xgpu_vi_init_golden_registers(struct amdgpu_device *adev) static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev) { u32 reg; + int timeout = VI_MAILBOX_TIMEDOUT; + u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); reg = RREG32(mmMAILBOX_CONTROL); reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, RCV_MSG_ACK, 1); WREG32(mmMAILBOX_CONTROL, reg); + + /*Wait for RCV_MSG_VALID to be 0*/ + reg = RREG32(mmMAILBOX_CONTROL); + while (reg & mask) { + if (timeout <= 0) { + pr_err("RCV_MSG_VALID is not cleared\n"); + break; + } + mdelay(1); + timeout -=1; + + reg = RREG32(mmMAILBOX_CONTROL); + } } static void xgpu_vi_mailbox_set_valid(struct amdgpu_device *adev, bool val) @@ -339,6 +354,8 @@ static void xgpu_vi_mailbox_trans_msg(struct amdgpu_device *adev, { u32 reg; + xgpu_vi_mailbox_send_ack(adev); + reg = RREG32(mmMAILBOX_MSGBUF_TRN_DW0); reg = REG_SET_FIELD(reg, MAILBOX_MSGBUF_TRN_DW0, MSGBUF_DATA, event); @@ -351,6 +368,11 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { u32 reg; + u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); + + reg = RREG32(mmMAILBOX_CONTROL); + if (!(reg & mask)) + return -ENOENT; reg = RREG32(mmMAILBOX_MSGBUF_RCV_DW0); if (reg != event) @@ -419,7 +441,9 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, xgpu_vi_mailbox_set_valid(adev, false); /* start to check msg if request is idh_req_gpu_init_access */ - if (request == IDH_REQ_GPU_INIT_ACCESS) { + if (request == IDH_REQ_GPU_INIT_ACCESS || + request == IDH_REQ_GPU_FINI_ACCESS || + request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) return r; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 11/20] drm/amdgpu:add lock_reset for SRIOV
this lock is used for sriov_gpu_reset, only get this mutex can run into sriov_gpu_reset. we have two cases triggers gpu_reset for SRIOV: 1) we have submit timedout and trigger reset voluntarily 2) hypervisor found world switch hang and trigger flr and notify we to do gpu reset. both cases need take care and we need a mutex to protect the consistency of reset routine. Change-Id: I37aabccfaef1cde32dc350062a519d32c9d51c02 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 82a70db..ac035ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -99,6 +99,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->enable_virtual_display = true; mutex_init(>virt.lock_kiq); + mutex_init(>virt.lock_reset); } uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 7020ff2..4b05568 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -47,6 +47,7 @@ struct amdgpu_virt { bool chained_ib_support; uint32_treg_val_offs; struct mutexlock_kiq; + struct mutexlock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; struct delayed_work flr_work; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 08/20] drm/amdgpu:divide KCQ mqd init to sw and hw
sw part only invoked once during sw_init. hw part invoked during first drv load and resume later. that way we cannot alloc mqd in hw/resume, we only keep mqd allocted in sw_init routine. and hw_init routine only kmap and set it. Change-Id: Ib0b788c71154e79819e8abb8daee9b9234a8eabb Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 107 +- 1 file changed, 42 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4029d32..6734e55 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2116,17 +2116,6 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } - - kiq = >gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, >ring, >irq); - if (r) - return r; - /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = >gfx.gfx_ring[i]; @@ -2169,6 +2158,24 @@ static int gfx_v8_0_sw_init(void *handle) return r; } + if (amdgpu_sriov_vf(adev)) { + r = gfx_v8_0_kiq_init(adev); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = >gfx.kiq; + r = gfx_v8_0_kiq_init_ring(adev, >ring, >irq); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = gfx_v8_0_compute_mqd_soft_init(adev); + if (r) + return r; + } + /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, @@ -2210,9 +2217,13 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_ring_fini(>gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(>gfx.compute_ring[i]); - gfx_v8_0_kiq_free_ring(>gfx.kiq.ring, >gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_compute_mqd_soft_fini(adev); + gfx_v8_0_kiq_free_ring(>gfx.kiq.ring, >gfx.kiq.irq); + gfx_v8_0_kiq_fini(adev); + } + gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); gfx_v8_0_free_microcode(adev); @@ -4892,70 +4903,37 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, return 0; } -static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) +static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; - int i; + int r = 0, i; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = >gfx.compute_ring[i]; - amdgpu_bo_free_kernel(>mqd_obj, NULL, NULL); - ring->mqd_obj = NULL; - } + gfx_v8_0_cp_compute_enable(adev, true); ring = >gfx.kiq.ring; - amdgpu_bo_free_kernel(>mqd_obj, NULL, NULL); - ring->mqd_obj = NULL; -} - -static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - struct vi_mqd *mqd; - u64 mqd_gpu_addr; - u32 *buf; - int r = 0; - - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, >mqd_obj, - _gpu_addr, (void **)); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)>mqd_ptr)) { + memset((void *)ring->mqd_ptr, 0, sizeof(struct vi_mqd)); + r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); + amdgpu_bo_kunmap(ring->mqd_obj); + if (r) + return r; + } else { return r; } - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - mqd = (struct vi_mqd *)buf; - - r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); - if (r) - return r; - - amdgpu_bo_kunmap(ring->mqd_obj); - - return 0; -} - -static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - ring = >gfx.kiq.ring; - r = gfx_v8_0_kiq_setup_queue(adev, ring); - if (r) - return r; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = >gfx.compute_ring[i]; - r =
[PATCH 10/20] drm/amdgpu:change kiq lock name
Change-Id: Ib11de11fb0a9e8086e542b932c9c62d5aa40ebb2 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 76ef641..82a70db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -98,7 +98,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; - mutex_init(>virt.lock); + mutex_init(>virt.lock_kiq); } uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) @@ -111,14 +111,14 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(>virt.lock); + mutex_lock(>virt.lock_kiq); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_rreg(ring, reg); amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, ); amdgpu_ring_commit(ring); - mutex_unlock(>virt.lock); + mutex_unlock(>virt.lock_kiq); r = fence_wait(f, false); if (r) @@ -139,14 +139,14 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(>virt.lock); + mutex_lock(>virt.lock_kiq); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, ); amdgpu_ring_commit(ring); - mutex_unlock(>virt.lock); + mutex_unlock(>virt.lock_kiq); r = fence_wait(f, false); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 73d24df..7020ff2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -46,7 +46,7 @@ struct amdgpu_virt { uint64_tcsa_vmid0_addr; bool chained_ib_support; uint32_treg_val_offs; - struct mutexlock; + struct mutexlock_kiq; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; struct delayed_work flr_work; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 09/20] drm/amdgpu:implement SRIOV gpu_reset
Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 158 - drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e926f84..2b404ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1604,6 +1604,53 @@ int amdgpu_suspend(struct amdgpu_device *adev) return 0; } +static int amdgpu_resume_early(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) + r = adev->ip_blocks[i].version->funcs->resume(adev); + + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + +static int amdgpu_resume_late(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; + + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + static int amdgpu_resume(struct amdgpu_device *adev) { int i, r; @@ -2343,6 +2390,115 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, } /** + * amdgpu_sriov_gpu_reset - reset the asic + * + * @adev: amdgpu device pointer + * @voluntary: if this reset is requested by guest. + * (true means by guest and false means by HYPERVISOR ) + * + * Attempt the reset the GPU if it has hung (all asics). + * for SRIOV case. + * Returns 0 for success or an error on failure. + */ +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +{ + int i, r = 0; + int resched; + struct amdgpu_bo *bo, *tmp; + struct amdgpu_ring *ring; + struct fence *fence = NULL, *next = NULL; + + mutex_lock(>virt.lock_reset); + atomic_inc(>gpu_reset_counter); + + /* block TTM */ + resched = ttm_bo_lock_delayed_workqueue(>mman.bdev); + + /* block scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + kthread_park(ring->sched.thread); + amd_sched_hw_job_reset(>sched); + } + + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion(adev); + + /* request to take full control of GPU before re-initialization */ + if (voluntary) + amdgpu_virt_reset_gpu(adev); + else + amdgpu_virt_request_full_gpu(adev, true); + + + /* Resume IP prior to SMC */ + amdgpu_resume_early(adev); + + /* we need recover gart prior to run SMC/CP/SDMA resume */ + amdgpu_ttm_recover_gart(adev); + + /* now we are okay to resume SMC/CP/SDMA */ + amdgpu_resume_late(adev); + + amdgpu_irq_gpu_reset_resume_helper(adev); + + if (amdgpu_ib_ring_tests(adev)) + dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); + + /* rellease full control of GPU after ib test */ + amdgpu_virt_release_full_gpu(adev, true); + + DRM_INFO("recover vram bo from shadow\n"); + + ring = adev->mman.buffer_funcs_ring; + mutex_lock(>shadow_list_lock); + list_for_each_entry_safe(bo, tmp, >shadow_list, shadow_list) { + amdgpu_recover_vram_from_shadow(adev, ring, bo, ); + if (fence) { + r = fence_wait(fence, false); + if (r) { + WARN(r, "recovery from shadow isn't completed\n"); + break; + } + } + +
[PATCH 06/20] drm/amdgpu:no need use sriov judge
Change-Id: I9717e200be8af36f52d6305e02ffea178044c851 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index df1cfc5..fd29124 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1379,11 +1379,9 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, { int r = 0; - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_wb_get(adev, >virt.reg_val_offs); - if (r) - return r; - } + r = amdgpu_wb_get(adev, >virt.reg_val_offs); + if (r) + return r; ring->adev = NULL; ring->ring_obj = NULL; @@ -1407,13 +1405,10 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, return r; } - static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { - if (amdgpu_sriov_vf(ring->adev)) - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); irq->data = NULL; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/20] drm/amdgpu:fix powerplay logic
1,like pp_hw_init, we shouldn't report error if PP disabled 2,disable pp_en if sriov Change-Id: I6d259f9609f223998bea236f64676b9c22133e4e Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 2 +- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 8856ecc..d56d200 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev) amd_pp = &(adev->powerplay); pp_init.chip_family = adev->family; pp_init.chip_id = adev->asic_type; - pp_init.pm_en = amdgpu_dpm != 0 ? true : false; + pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; pp_init.feature_mask = amdgpu_pp_feature_mask; pp_init.device = amdgpu_cgs_create_device(adev); ret = amd_powerplay_create(_init, &(amd_pp->pp_handle)); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 429f18b..e9cf207 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -286,7 +286,7 @@ static int pp_resume(void *handle) } if (ret1 == PP_DPM_DISABLED) - return ret1; + return 0; eventmgr = pp_handle->eventmgr; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 05/20] drm/amdgpu:bo_free_kernel will set ptr to NULL if freed
Change-Id: Iac592f1a6c927677008feabc1b7af6f18c580910 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0e2c906..df1cfc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1477,7 +1477,6 @@ static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) struct amdgpu_kiq *kiq = >gfx.kiq; amdgpu_bo_free_kernel(>eop_obj, >eop_gpu_addr, NULL); - kiq->eop_obj = NULL; } static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 04/20] drm/amdgpu:imple mqd soft ini/fini
Change-Id: I650a78c8d27f76997e1ef6e3934d0d7e043d4715 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 52 +++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 22bd155..0e2c906 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -659,6 +659,8 @@ static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); +static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev); +static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -7322,3 +7324,53 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); amdgpu_ring_write_multiple(ring, (void *)_payload, cnt_de - 2); } + +/* create MQD for each compute queue */ +static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = >gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, >mqd_obj, + >mqd_gpu_addr, (void **)>mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) + { + ring = >gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, >mqd_obj, + >mqd_gpu_addr, (void **)>mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + } + } + + return 0; +} + +static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = >gfx.compute_ring[i]; + amdgpu_bo_free_kernel(>mqd_obj, >mqd_gpu_addr, (void **)>mqd_ptr); + } + + ring = >gfx.kiq.ring; + amdgpu_bo_free_kernel(>mqd_obj, >mqd_gpu_addr, (void **)>mqd_ptr); +} \ No newline at end of file -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 02/20] drm/amdgpu:cg & pg are not applied on VF
Change-Id: I93a4e97f1d24a289ab20c2a62371f3e303322587 Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 9 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 6 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 ++ drivers/gpu/drm/amd/amdgpu/vi.c| 6 ++ 4 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0a75021..1e170ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5833,6 +5833,9 @@ static int gfx_v8_0_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5890,6 +5893,9 @@ static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_GFX_MGCG */ data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) @@ -6403,6 +6409,9 @@ static int gfx_v8_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_CARRIZO: diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..22c52d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1427,6 +1427,9 @@ static int gmc_v8_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: fiji_update_mc_medium_grain_clock_gating(adev, @@ -1451,6 +1454,9 @@ static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_MC_MGCG */ data = RREG32(mmMC_HUB_MISC_HUB_CG); if (data & MC_HUB_MISC_HUB_CG__ENABLE_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 25602c4..9394ca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1512,6 +1512,9 @@ static int sdma_v3_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_CARRIZO: @@ -1538,6 +1541,9 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_SDMA_MGCG */ data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[0]); if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK)) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 89b0dfe..aeef3c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1391,6 +1391,9 @@ static int vi_common_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: vi_update_bif_medium_grain_light_sleep(adev, @@ -1435,6 +1438,9 @@ static void vi_common_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_BIF_LS */ data = RREG32_PCIE(ixPCIE_CNTL2); if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF
Good point. — Sincerely Yours, Pixel On 07/02/2017, 1:54 PM, "Zhang, Jerry"wrote: >Maybe we can get the some useful info from IV entry. > >Regards, >Jerry (Junwei Zhang) > >Linux Base Graphics >SRDC Software Development >_ > > >> -Original Message- >> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of >> Pixel Ding >> Sent: Tuesday, February 07, 2017 13:49 >> To: Zhou, David(ChunMing); Koenig, Christian >> Cc: Ding, Pixel; amd-gfx@lists.freedesktop.org >> Subject: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF >> >> VF uses KIQ to access registers. When VM fault occurs, the driver can't get >> back >> the fence of KIQ submission and runs into CPU soft lockup. >> >> Signed-off-by: Pixel Ding >> --- >> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 5 + >> 1 file changed, 5 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> index 7669b32..ff4fb37 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> @@ -1237,6 +1237,11 @@ static int gmc_v8_0_process_interrupt(struct >> amdgpu_device *adev, { >> u32 addr, status, mc_client; >> >> +if (amdgpu_sriov_vf(adev)) { >> +dev_err(adev->dev, "GPU fault detected on VF, can't access VM >> registers\n"); >> +return 0; >> +} >> + >> addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); >> status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); >> mc_client = >> RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); >> -- >> 2.7.4 >> >> ___ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF
Maybe we can get the some useful info from IV entry. Regards, Jerry (Junwei Zhang) Linux Base Graphics SRDC Software Development _ > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of > Pixel Ding > Sent: Tuesday, February 07, 2017 13:49 > To: Zhou, David(ChunMing); Koenig, Christian > Cc: Ding, Pixel; amd-gfx@lists.freedesktop.org > Subject: [PATCH] drm/amdgpu/virt: skip VM fault handler for VF > > VF uses KIQ to access registers. When VM fault occurs, the driver can't get > back > the fence of KIQ submission and runs into CPU soft lockup. > > Signed-off-by: Pixel Ding> --- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 7669b32..ff4fb37 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -1237,6 +1237,11 @@ static int gmc_v8_0_process_interrupt(struct > amdgpu_device *adev, { > u32 addr, status, mc_client; > > + if (amdgpu_sriov_vf(adev)) { > + dev_err(adev->dev, "GPU fault detected on VF, can't access VM > registers\n"); > + return 0; > + } > + > addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); > status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); > mc_client = > RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu/virt: skip VM fault handler for VF
VF uses KIQ to access registers. When VM fault occurs, the driver can't get back the fence of KIQ submission and runs into CPU soft lockup. Signed-off-by: Pixel Ding--- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..ff4fb37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1237,6 +1237,11 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, { u32 addr, status, mc_client; + if (amdgpu_sriov_vf(adev)) { + dev_err(adev->dev, "GPU fault detected on VF, can't access VM registers\n"); + return 0; + } + addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF
Hi David/Christian, Tested the proposed method which prints warning only, there’s no problem. I will send another review because it’s a totally different change. — Sincerely Yours, Pixel On 06/02/2017, 4:56 PM, "Christian König"wrote: >Hi Pixel, > >yeah agree with David here, but even busy waiting for the KIQ register >read is not really an option in the interrupt handler. > >Additional to that when we have a VM fault we usually see a mass storm >of them. So allocating and scheduling a work item for each fault like >you do here will certainly only result in a whole system crash. > >For now I would say just use DRM_ERROR() to print a warning that a VM >fault happen and we can't decode it because we can't access the register >under SRIOV. > >Regards, >Christian. > >Am 06.02.2017 um 09:36 schrieb zhoucm1: >> Hi Pixel, >> I got your mean just now, since your VF must use KIQ to read/write >> registers, which use fence_wait to wait reading register completed. >> The alternative way is implementing a new kiq reading/writing register >> way by using udelay instead of fence wait when reading/writing >> register in interrupt context. >> >> Regards, >> David Zhou >> >> On 2017年02月06日 15:55, Ding, Pixel wrote: >>> Thanks you for your comments, David. I totally agree on your point. >>> >>> However, The VM fault status registers record the latest VM fault >>> info no matter when they’re changed, that’s what we care about since >>> we don’t handle too much for VM fault even in bare metal system. >>> >>> On the other hand, what do you think if we insist to sleep in the >>> interrupt context and let the driver runs into a software bug? >>> >>> The VM registers are shared among all VFs and we don’t have a copy >>> for each in hardware, I think there's no other way to access them in >>> this case. Do you have any suggestion? >>> >>> — >>> Sincerely Yours, >>> Pixel >>> >>> >>> >>> >>> >>> >>> >>> >>> On 06/02/2017, 3:33 PM, "Zhou, David(ChunMing)" >>> wrote: >>> INIT_WORK(>base, gmc_v8_0_vm_fault_sched); However VF is used or not, schedule work shouldn't handle registers reading for interrupt, especially for status register, which could have been changed when you handle it in schedule work after interrupt. Regards, David Zhou -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Monday, February 06, 2017 3:00 PM To: amd-gfx@lists.freedesktop.org Cc: Ding, Pixel Subject: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF VF uses KIQ to access registers that invoking fence_wait to get the accessing completed. When VM fault occurs, the driver can't sleep in interrupt context. For some test cases, VM fault is 'legal' and shouldn't cause driver soft lockup. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 46 --- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..75c913f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1231,9 +1231,9 @@ static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) +static int gmc_v8_0_process_vm_fault(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { u32 addr, status, mc_client; @@ -1262,6 +1262,46 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, return 0; } +struct gmc_vm_fault_work { +struct work_struct base; +struct amdgpu_device*adev; +struct amdgpu_irq_src *source; +struct amdgpu_iv_entry *entry; +}; + +static void gmc_v8_0_vm_fault_sched(struct work_struct *work) { +struct gmc_vm_fault_work *vm_work = +container_of(work, struct gmc_vm_fault_work, base); +struct amdgpu_device *adev = vm_work->adev; +struct amdgpu_irq_src *source = vm_work->source; +struct amdgpu_iv_entry *entry = vm_work->entry; + +gmc_v8_0_process_vm_fault(adev, source, entry); + +kfree(vm_work); +} + +static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) {
RE: [bug report] drm/amdgpu/virt: implement VI virt operation interfaces
Thanks! I'll fix it. Thanks! Xiangliang Yu > -Original Message- > From: Dan Carpenter [mailto:dan.carpen...@oracle.com] > Sent: Monday, February 06, 2017 7:19 PM > To: Yu, Xiangliang> Cc: amd-gfx@lists.freedesktop.org > Subject: [bug report] drm/amdgpu/virt: implement VI virt operation > interfaces > > Hello Xiangliang Yu, > > The patch ab71ac56f6d8: "drm/amdgpu/virt: implement VI virt operation > interfaces" from Jan 12, 2017, leads to the following static checker > warning: > > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c:310 > xgpu_vi_init_golden_registers() > warn: string literals are always true. > > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c >294 break; >295 case CHIP_TONGA: >296 amdgpu_program_register_sequence(adev, >297 > xgpu_tonga_mgcg_cgcg_init, >298 (const > u32)ARRAY_SIZE( >299 > xgpu_tonga_mgcg_cgcg_init)); >300 amdgpu_program_register_sequence(adev, >301 > xgpu_tonga_golden_settings_a11, >302 (const > u32)ARRAY_SIZE( >303 > xgpu_tonga_golden_settings_a11)); >304 amdgpu_program_register_sequence(adev, >305 > xgpu_tonga_golden_common_all, >306 (const > u32)ARRAY_SIZE( >307 > xgpu_tonga_golden_common_all)); >308 break; >309 default: >310 BUG_ON("Doesn't support chip type.\n"); > > BUG_ON() takes a condition argument, not a format string. > >311 break; >312 } >313 } > > regards, > dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: don't clean the framebuffer for VF
Hi Christian, I think you mean loading KMS multiple times by “reuse”. At every time loading KMS, guest driver tells the host to clear FB during early init. I can’t see a case that fb_probe is invoked out of loading KMS, is there? Anyway I understand we don’t want to have many SRIOV conditional code paths. If I remove memset_io here and add GPU clear flag, should it be common logic or specific for VF? — Sincerely Yours, Pixel On 06/02/2017, 5:17 PM, "Koenig, Christian"wrote: >Hi Pixel, > >you don't seem to understand the reason for the clear here. > >It is completely irrelevant that the host is clearing the memory for the >guest, the problem is that the guest reuse the memory it got assigned >from the host multiple times. > >IIRC we added this because you could see leftovers of the slash screen >in the text console when the resolution wasn't a multiple of the >character height. > >Regards, >Christian. > >Am 06.02.2017 um 10:09 schrieb Ding, Pixel: >> Hi Christian, >> >> The underlying host driver clears VF’s framebuffer when guest driver shake >> hands with it, that is done before guest driver init. I think it’s >> unnecessary to clear FB again even with GPU for VF. >> >> — >> Sincerely Yours, >> Pixel >> >> >> >> >> >> On 06/02/2017, 4:49 PM, "Koenig, Christian" wrote: >> >>> Am 06.02.2017 um 07:24 schrieb Pixel Ding: The SRIOV host driver cleans framebuffer for each VF, guest driver needn't this action which costs much time on some virtualization platform, otherwise it might get timeout to initialize. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 1e735c4..f1eb4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -242,7 +242,9 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + if (!amdgpu_sriov_vf(adev)) { + memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + } >>> Nit pick only, but coding style says to not use "{" "}" in an if without >>> else and only a single line of code. >>> >>> Additional to that I'm not sure if that's a good idea. The memory >>> allocated here might be already be used and so we need to clear it no >>> matter where it came from. >>> >>> It's probably easier to just set the AMDGPU_GEM_CREATE_VRAM_CLEARED in >>> the call to amdgpu_gem_object_create(). This makes the GPU clear the >>> memory before the first CPU access to it. >>> >>> Regards, >>> Christian. >>> strcpy(info->fix.id, "amdgpudrmfb"); >>> > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/7] drm/amd/display: HDMI YCbCr422 12bpc pixel format issue
From: Charlene LiuChange-Id: I7c7ff93d61b4bc29194ee0e4252be956721e7e9e Signed-off-by: Charlene Liu Reviewed-by: Tony Cheng Acked-by: Harry Wentland --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 - drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 30 ++-- .../gpu/drm/amd/display/dc/dce/dce_clock_source.c | 29 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_opp.c | 32 +- .../drm/amd/display/dc/dce/dce_stream_encoder.c| 16 +++ .../drm/amd/display/dc/dce110/dce110_resource.c| 3 ++ drivers/gpu/drm/amd/display/dc/inc/clock_source.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/opp.h| 1 + 8 files changed, 76 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ec5de329b091..2b929393f68a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1267,6 +1267,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) { struct core_stream *stream = pipe_ctx->stream; struct core_link *link = stream->sink->link; + enum dc_color_depth display_color_depth; if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) dal_ddc_service_write_scdc_data( @@ -1277,10 +1278,14 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) memset(>sink->link->public.cur_link_settings, 0, sizeof(struct dc_link_settings)); + display_color_depth = stream->public.timing.display_color_depth; + if (stream->public.timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + display_color_depth = COLOR_DEPTH_888; + link->link_enc->funcs->enable_tmds_output( link->link_enc, pipe_ctx->clock_source->id, - stream->public.timing.display_color_depth, + display_color_depth, pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A, pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK, stream->phy_pix_clk); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 3d8a85e48b05..856a33ad8ec1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1045,25 +1045,25 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing) if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) pix_clk /= 2; - - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - normalized_pix_clk = pix_clk; - break; - case COLOR_DEPTH_101010: - normalized_pix_clk = (pix_clk * 30) / 24; + if (timing->pixel_encoding != PIXEL_ENCODING_YCBCR422) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + normalized_pix_clk = pix_clk; + break; + case COLOR_DEPTH_101010: + normalized_pix_clk = (pix_clk * 30) / 24; + break; + case COLOR_DEPTH_121212: + normalized_pix_clk = (pix_clk * 36) / 24; break; - case COLOR_DEPTH_121212: - normalized_pix_clk = (pix_clk * 36) / 24; + case COLOR_DEPTH_161616: + normalized_pix_clk = (pix_clk * 48) / 24; break; - case COLOR_DEPTH_161616: - normalized_pix_clk = (pix_clk * 48) / 24; - break; - default: - ASSERT(0); + default: + ASSERT(0); break; + } } - return normalized_pix_clk; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 78f43274a03a..a9f39218ce82 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -390,25 +390,24 @@ static bool pll_adjust_pix_clk( struct bp_adjust_pixel_clock_parameters bp_adjust_pixel_clock_params = { 0 }; enum bp_result bp_result; - switch (pix_clk_params->signal_type) { case SIGNAL_TYPE_HDMI_TYPE_A: { requested_clk_khz = pix_clk_params->requested_pix_clk; - - switch (pix_clk_params->color_depth) { - case COLOR_DEPTH_101010: - requested_clk_khz = (requested_clk_khz * 5) >> 2; - break; /* x1.25*/ - case COLOR_DEPTH_121212: -
[PATCH 7/7] drm/amd/display: Fix MST physical ports always disconnected
From: Krzysztof NowickiRemove a false assumption that a cached EDID will be present whenever the connector is in a connected state as this will only be true for logical MST ports. For physical ports the EDID will never be cached, which will cause them to always appear as disconnected. This reverts commit 4ff8a8de271bfb7750b2a5c68163848e2bf1 --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6909dc80eb70..937558d1d7f4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -127,14 +127,6 @@ dm_dp_mst_detect(struct drm_connector *connector, bool force) >mst_mgr, aconnector->port); - /* -* we do not want to make this connector connected until we have edid on -* it -*/ - if (status == connector_status_connected && - !aconnector->port->cached_edid) - status = connector_status_disconnected; - return status; } -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/7] drm/amd/display: Fixed color temperature corruption.
From: Yongqiang SunChange-Id: I360090f0b9c9200a8ebc0ca3d75e370b80a8f6cc Signed-off-by: Yongqiang Sun Reviewed-by: Jordan Lazare Acked-by: Harry Wentland --- drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index b8735b27e9bc..dfe590198a33 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -65,7 +65,7 @@ struct pwl_result_data { }; struct pwl_params { - struct gamma_curve arr_curve_points[16]; + struct gamma_curve arr_curve_points[34]; struct curve_points arr_points[3]; struct pwl_result_data rgb_resulted[256 + 3]; uint32_t hw_points_num; -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/7] drm/amd/display: fix crash with modesetting driver
They might call commit with ACTION_NOTHING without a mode. We shouldn't crash but simply skip updating stream scaling settings since scaling obviously didn't change without a provided mode. Change-Id: Id787a2f3dd73eef3512cb35f48c8cd5842d22032 Signed-off-by: Harry WentlandReviewed-by: Andrey Grodzovsky Acked-by: Harry Wentland --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c index 865ee1ecce4f..c8a7b636a1b3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_types.c @@ -644,6 +644,10 @@ static void update_stream_scaling_settings( struct rect src = { 0 }; /* viewport in composition space*/ struct rect dst = { 0 }; /* stream addressable area */ + /* no mode. nothing to be done */ + if (!mode) + return; + /* Full screen scaling by default */ src.width = mode->hdisplay; src.height = mode->vdisplay; -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/7] DC Patches Feb 6, 2017
* Fix for some MST issues with hubs and HP docks * Fix for crash with modesetting driver * bunch of pixel format work in base DC Charlene Liu (2): drm/amd/display: Fix YCbCr pixel format shows green issue drm/amd/display: HDMI YCbCr422 12bpc pixel format issue Harry Wentland (1): drm/amd/display: fix crash with modesetting driver Hersen Wu (1): drm/amd/display: Clear test pattern when enabling stream Krzysztof Nowicki (1): drm/amd/display: Fix MST physical ports always disconnected Vitaly Prosyak (1): drm/amd/display: Adding 10 bpcc video P010 format Yongqiang Sun (1): drm/amd/display: Fixed color temperature corruption. .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c| 8 .../drm/amd/display/amdgpu_dm/amdgpu_dm_types.c| 4 ++ .../gpu/drm/amd/display/dc/calcs/bandwidth_calcs.c | 5 +++ drivers/gpu/drm/amd/display/dc/core/dc_link.c | 12 - drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 52 -- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 5 ++- .../gpu/drm/amd/display/dc/dce/dce_clock_source.c | 29 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_opp.c | 32 ++--- .../drm/amd/display/dc/dce/dce_stream_encoder.c| 16 --- .../drm/amd/display/dc/dce110/dce110_resource.c| 3 ++ drivers/gpu/drm/amd/display/dc/inc/clock_source.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/opp.h| 1 + 13 files changed, 109 insertions(+), 62 deletions(-) -- 2.9.3 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/display: Fix MST physical ports always disconnected
We finally were able to give this patch a spin. It looks good, although we still have a ton of MST problems. Reviewed-by: Harry WentlandYou can find it on https://cgit.freedesktop.org/~hwentland/linux/log/?h=amd-staging-dc-4.9 Harry On 2017-01-27 12:58 PM, Krzysztof Nowicki wrote: > From: Krzysztof Nowicki > > Remove a false assumption that a cached EDID will be present whenever > the connector is in a connected state as this will only be true for > logical MST ports. For physical ports the EDID will never be cached, > which will cause them to always appear as disconnected. > > This reverts commit 4ff8a8de271bfb7750b2a5c68163848e2bf1 > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 8 > 1 file changed, 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > index 6909dc8..937558d 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > @@ -127,14 +127,6 @@ dm_dp_mst_detect(struct drm_connector *connector, bool > force) > >mst_mgr, > aconnector->port); > > - /* > - * we do not want to make this connector connected until we have edid on > - * it > - */ > - if (status == connector_status_connected && > - !aconnector->port->cached_edid) > - status = connector_status_disconnected; > - > return status; > } > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Autodetect libdrm path (v2)
Apparently I missed the bottom of your reply (all of the clients I have outlook/gmail do top post only ...) as for the cmake changes. I'm for it if you want to submit a patch. I can't imagine a lot of cross compiling though since users will typically be using it on the platform they built it for. Ironically, I had the pkg_check originally but was told that's a faux-pas. Unless this is breaking for actual users though it's not really a priority to bikeshed the build system of a 30 file project that is meant to work only on developer machines who are likely building for themselves [] Tom From: amd-gfxon behalf of StDenis, Tom Sent: Monday, February 6, 2017 16:33 To: Emil Velikov; Tom St Denis Cc: amd-gfx mailing list Subject: Re: [PATCH] Autodetect libdrm path (v2) I have to NAK that idea since we use umr on NPI work which doesn't necessarily have libdrm support. Also umr can read/write registers via pci access without amdgpu loaded (handy if amdgpu fails to init properly). Though you are right that libdrm is technically a requirement and I should add that to the README. Tom From: Emil Velikov Sent: Monday, February 6, 2017 15:18 To: Tom St Denis Cc: amd-gfx mailing list; StDenis, Tom Subject: Re: [PATCH] Autodetect libdrm path (v2) Hi Tom, On 5 February 2017 at 22:24, Tom St Denis wrote: > (v2): Use findLibDRM script instead of directly finding path > Since the project already depends on libdrm you might want to use the drmDevice2 API and drop the iteration over all PCI devices (libpciaccess dependency). If the former is lacking something feel free to suggest/send patches ;-) The libdrm requirement was missing last time I've skimmed through the README. > Signed-off-by: Tom St Denis > --- > --- /dev/null > +++ b/cmake_modules/FindLibDRM.cmake > @@ -0,0 +1,35 @@ > +# Try to find libdrm > +# > +# Once done, this will define > +# > +# LIBDRM_FOUND > +# LIBDRM_INCLUDE_DIR > +# LIBDRM_LIBRARIES > + > +find_package(PkgConfig) > + > +pkg_check_modules(PC_LIBDRM QUIET libdrm) You really want a version check. > + > +find_path(LIBDRM_INCLUDE_DIR NAMES amdgpu_drm.h > +HINTS > +${PC_LIBDRM_INCLUDEDIR} > +${PC_LIBDRM_INCLUDE_DIRS} > +/usr/include > +) > + > +find_library(LIBDRM_LIBRARY NAMES libdrm_amdgpu.so.1 > +HINTS > +${PC_LIBDRM_LIBDIR} > +${PC_LIBDRM_LIBRARY_DIRS} > +/usr/lib64 > +/usr/lib > +) > + Here is one of the things which makes me rage against cmake - no picking /usr/{include,foo} is _not_ what you want. During cross-compilation as one is missing the .pc file, the system headers/libraries will be picked. This is horribly wrong, yet rather common in cmake world. What you really want is a lovely error message to warn the user - s/OPTIONAL/REQUIRED/ will give you just that. At which point, checking for the header/library in the paths provided by the .pc file is redundant and thus nearly everything else in the file ;-) TL;Dr: All you need is pkg_check_modules(PC_LIBDRM REQUIRED libdrm>=XX). Thanks Emil ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Autodetect libdrm path (v2)
I have to NAK that idea since we use umr on NPI work which doesn't necessarily have libdrm support. Also umr can read/write registers via pci access without amdgpu loaded (handy if amdgpu fails to init properly). Though you are right that libdrm is technically a requirement and I should add that to the README. Tom From: Emil VelikovSent: Monday, February 6, 2017 15:18 To: Tom St Denis Cc: amd-gfx mailing list; StDenis, Tom Subject: Re: [PATCH] Autodetect libdrm path (v2) Hi Tom, On 5 February 2017 at 22:24, Tom St Denis wrote: > (v2): Use findLibDRM script instead of directly finding path > Since the project already depends on libdrm you might want to use the drmDevice2 API and drop the iteration over all PCI devices (libpciaccess dependency). If the former is lacking something feel free to suggest/send patches ;-) The libdrm requirement was missing last time I've skimmed through the README. > Signed-off-by: Tom St Denis > --- > --- /dev/null > +++ b/cmake_modules/FindLibDRM.cmake > @@ -0,0 +1,35 @@ > +# Try to find libdrm > +# > +# Once done, this will define > +# > +# LIBDRM_FOUND > +# LIBDRM_INCLUDE_DIR > +# LIBDRM_LIBRARIES > + > +find_package(PkgConfig) > + > +pkg_check_modules(PC_LIBDRM QUIET libdrm) You really want a version check. > + > +find_path(LIBDRM_INCLUDE_DIR NAMES amdgpu_drm.h > +HINTS > +${PC_LIBDRM_INCLUDEDIR} > +${PC_LIBDRM_INCLUDE_DIRS} > +/usr/include > +) > + > +find_library(LIBDRM_LIBRARY NAMES libdrm_amdgpu.so.1 > +HINTS > +${PC_LIBDRM_LIBDIR} > +${PC_LIBDRM_LIBRARY_DIRS} > +/usr/lib64 > +/usr/lib > +) > + Here is one of the things which makes me rage against cmake - no picking /usr/{include,foo} is _not_ what you want. During cross-compilation as one is missing the .pc file, the system headers/libraries will be picked. This is horribly wrong, yet rather common in cmake world. What you really want is a lovely error message to warn the user - s/OPTIONAL/REQUIRED/ will give you just that. At which point, checking for the header/library in the paths provided by the .pc file is redundant and thus nearly everything else in the file ;-) TL;Dr: All you need is pkg_check_modules(PC_LIBDRM REQUIRED libdrm>=XX). Thanks Emil ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: Change queue/pipe split between amdkfd and amdgpu
Hi Andres, Thank you for tackling this task. It's more involved than I expected, mostly because I didn't have much awareness of the MQD management in amdgpu. I made one comment in a separate message about the unified MQD commit function, if you want to bring that more in line with our latest ROCm release on github. Also, were you able to test the upstream KFD with your changes on a Kaveri or Carrizo? Regards, Felix On 17-02-03 11:51 PM, Andres Rodriguez wrote: > The current queue/pipe split policy is for amdgpu to take the first pipe of > MEC0 and leave the rest for amdkfd to use. This policy is taken as an > assumption in a few areas of the implementation. > > This patch series aims to allow for flexible/tunable queue/pipe split policies > between kgd and kfd. It also updates the queue/pipe split policy to one that > allows better compute app concurrency for both drivers. > > In the process some duplicate code and hardcoded constants were removed. > > Any suggestions or feedback on improvements welcome. > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Autodetect libdrm path (v2)
Hi Tom, On 5 February 2017 at 22:24, Tom St Deniswrote: > (v2): Use findLibDRM script instead of directly finding path > Since the project already depends on libdrm you might want to use the drmDevice2 API and drop the iteration over all PCI devices (libpciaccess dependency). If the former is lacking something feel free to suggest/send patches ;-) The libdrm requirement was missing last time I've skimmed through the README. > Signed-off-by: Tom St Denis > --- > --- /dev/null > +++ b/cmake_modules/FindLibDRM.cmake > @@ -0,0 +1,35 @@ > +# Try to find libdrm > +# > +# Once done, this will define > +# > +# LIBDRM_FOUND > +# LIBDRM_INCLUDE_DIR > +# LIBDRM_LIBRARIES > + > +find_package(PkgConfig) > + > +pkg_check_modules(PC_LIBDRM QUIET libdrm) You really want a version check. > + > +find_path(LIBDRM_INCLUDE_DIR NAMES amdgpu_drm.h > +HINTS > +${PC_LIBDRM_INCLUDEDIR} > +${PC_LIBDRM_INCLUDE_DIRS} > +/usr/include > +) > + > +find_library(LIBDRM_LIBRARY NAMES libdrm_amdgpu.so.1 > +HINTS > +${PC_LIBDRM_LIBDIR} > +${PC_LIBDRM_LIBRARY_DIRS} > +/usr/lib64 > +/usr/lib > +) > + Here is one of the things which makes me rage against cmake - no picking /usr/{include,foo} is _not_ what you want. During cross-compilation as one is missing the .pc file, the system headers/libraries will be picked. This is horribly wrong, yet rather common in cmake world. What you really want is a lovely error message to warn the user - s/OPTIONAL/REQUIRED/ will give you just that. At which point, checking for the header/library in the paths provided by the .pc file is redundant and thus nearly everything else in the file ;-) TL;Dr: All you need is pkg_check_modules(PC_LIBDRM REQUIRED libdrm>=XX). Thanks Emil ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] Allow reading all of ring buffer and decode partial ranges
Fixes bug where say "-R gfx" would not actually dump the full ring contents. Adds feature where if you specify start and stop, e.g., "-R gfx[0:16]" will enable the decoder. Signed-off-by: Tom St Denis--- src/app/ring_read.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/app/ring_read.c b/src/app/ring_read.c index 1726573a0396..970310b9bf52 100644 --- a/src/app/ring_read.c +++ b/src/app/ring_read.c @@ -78,13 +78,14 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath) drv_wptr = ring_data[2]<<2; /* default to reading entire ring */ + use_decoder = 0; if (!from[0]) { start = 0; - end = ringsize; + end = ringsize-4; } else { if (from[0] == '.' || !to[0] || to[0] == '.') { /* start from 32 words prior to rptr up to wptr */ - end = wptr+4; + end = wptr; if (rptr < (31*4)) { start = rptr - 31*4; start += ringsize; @@ -95,6 +96,7 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath) } else { sscanf(from, "%"SCNu32, ); sscanf(to, "%"SCNu32, ); + use_decoder = 1; } } end %= ringsize; @@ -106,8 +108,7 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath) asic->asicname, ringname, (unsigned long)wptr >> 2, asic->asicname, ringname, (unsigned long)drv_wptr >> 2); - use_decoder = 0; - while (start != end) { + do { value = ring_data[(start+12)>>2]; printf("%s.%s.ring[%4lu] == 0x%08lx ", asic->asicname, ringname, (unsigned long)start >> 2, (unsigned long)value); if (enable_decoder && start == rptr && start != wptr) { @@ -123,7 +124,7 @@ void umr_read_ring(struct umr_asic *asic, char *ringpath) printf("\n"); start += 4; start %= ringsize; - } + } while (start != ((end + 4) % ringsize)); free(ring_data); printf("\n"); -- 2.11.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 07/21] drm/amdgpu:fix gart table vram pin
I recall why I made this patch When testing SRIOV gpu reset feature, I it will always waiting and not return if without this patch, with more look into it: Because gpu_srio_reset (will send patch for this routine later) doesn't call amdgpu_suspend(), so the gart table BO won't get unpin, which lead to driver infinite wait loop if we pin it again in resume. For bare-metal case, gpu_reset will call amdgpu_suspend so the gart bo will unpin. BTW: GPU_SRIOV_RESET is invoked after HYPERVISOR call VF_FLR on this vf device, so all IP blocks's suspend routine is not needed at all. What about: >> +if (adev->gart.table_addr && amdgpu_sriov_vf(adev)) { >> +/* it's a resume call, gart already pin */ >> +return 0; >> +} BR Monk -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Monday, February 06, 2017 10:31 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 07/21] drm/amdgpu:fix gart table vram pin Hui? We shouldn't need to call this function from a GPU reset, do we really do so? But even if we call it from GPU reset we certainly should have called the matching unpin function before. Otherwise we certainly won't be able to resume from the next suspend after a GPU reset. Regards, Christian. Am 06.02.2017 um 15:25 schrieb Liu, Monk: > E looks like I missed the part of S3 function > > But if this is from a GPU reset , we also shouldn't continue run this > function otherwise GPU reset will fail (SRIOV reset test) > > BR Monk > > -Original Message- > From: Christian König [mailto:deathsim...@vodafone.de] > Sent: Monday, February 06, 2017 4:14 PM > To: Liu, Monk ; amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH 07/21] drm/amdgpu:fix gart table vram pin > > A bug NAK on this! amdgpu_gart_table_vram_unpin() must be called during > suspend. > > Otherwise the GART table can be corrupted and we run into a whole bunch of > problems. > > We could add a "BUG_ON(adev->gart.table_addr != NULL);" here to double check > that, but just ignoring that something went horrible wrong is clearly the > wrong approach. > > Regards, > Christian. > > Am 04.02.2017 um 11:34 schrieb Monk Liu: >> if this call is from resume, shouldn't enter pin logic at all >> >> Change-Id: I40a5cdc2a716c4c20d2812fd74ece4ea284b6765 >> Signed-off-by: Monk Liu >> --- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 5 + >>1 file changed, 5 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c >> index 964d2a9..5e907f7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c >> @@ -151,6 +151,11 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device >> *adev) >> uint64_t gpu_addr; >> int r; >> >> +if (adev->gart.table_addr) { >> +/* it's a resume call, gart already pin */ >> +return 0; >> +} >> + >> r = amdgpu_bo_reserve(adev->gart.robj, false); >> if (unlikely(r != 0)) >> return r; > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amd/display: fix array lenth error.
seems reasonable. but i think need to check num_levels can't be 0. in some case, there is only one level of mclk, and higher than the max validation clocks.. and will lead kernel panic. Best Regards Rex From: Wentland, Harry Sent: Monday, February 6, 2017 10:54:24 PM To: Zhu, Rex; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 1/2] drm/amd/display: fix array lenth error. On 2017-02-06 12:08 AM, Rex Zhu wrote: > Change-Id: I09011c5e6d5493db7e3d9a7ff7ab8c871a8db862 > Signed-off-by: Rex Zhu> --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > index 5af27aa..50576c6 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > @@ -358,7 +358,7 @@ bool dm_pp_get_clock_levels_by_type( > * non-boosted one. */ >DRM_INFO("DM_PPLIB: reducing engine clock > level from %d to %d\n", >dc_clks->num_levels, i + 1); > - dc_clks->num_levels = i; > + dc_clks->num_levels = i + 1; It seems to me the DRM_INFO print is wrong here, not the actual assignment. We're setting num_levels to the current index if the clocks for that index are higher than the max validation clocks, hence this index now should become num_levels. >break; >} >} > @@ -367,7 +367,7 @@ bool dm_pp_get_clock_levels_by_type( >if (dc_clks->clocks_in_khz[i] > > validation_clks.memory_max_clock) { >DRM_INFO("DM_PPLIB: reducing memory clock > level from %d to %d\n", >dc_clks->num_levels, i + 1); > - dc_clks->num_levels = i; > + dc_clks->num_levels = i + 1; >break; >} >} > Same comment as above. Harry ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 03/21] drm/amdgpu:fix scheduler hw reset
Thanks, I'll sort & cleanup my patches and send again. BR Monk -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Christian K?nig Sent: Monday, February 06, 2017 4:18 PM To: Zhou, David(ChunMing); Liu, Monk ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 03/21] drm/amdgpu:fix scheduler hw reset Am 06.02.2017 um 04:18 schrieb Zhou, David(ChunMing): > I'm curious what problem this patch fix? Any crash? > > My impression list_for will check if the list is empty, am I wrong? Yeah, I agree as well. list_for won't do anything if the list is empty. So this patch doesn't has any effect as far as I can see. Regards, Christian. > > Regards, > David Zhou > > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of > Monk Liu > Sent: Saturday, February 04, 2017 6:22 PM > To: amd-gfx@lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 03/21] drm/amdgpu:fix scheduler hw reset > > Change-Id: I39e0b77029d22dc3fb37e2f19da699647ae96aad > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 12 > 1 file changed, 12 insertions(+) > > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > index ffe1f85..73dd5a7 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > @@ -384,6 +384,13 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler > *sched) > struct amd_sched_job *s_job; > > spin_lock(>job_list_lock); > + s_job = list_first_entry_or_null(>ring_mirror_list, > + struct amd_sched_job, node); > + if (!s_job) { > + spin_unlock(>job_list_lock); > + return; > + } > + > list_for_each_entry_reverse(s_job, >ring_mirror_list, node) { > if (fence_remove_callback(s_job->s_fence->parent, > _job->s_fence->cb)) { > fence_put(s_job->s_fence->parent); > @@ -405,6 +412,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler > *sched) > if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > schedule_delayed_work(_job->work_tdr, sched->timeout); > > + if (!s_job) { > + spin_unlock(>job_list_lock); > + return; > + } > + > list_for_each_entry_safe(s_job, tmp, >ring_mirror_list, node) { > struct amd_sched_fence *s_fence = s_job->s_fence; > struct fence *fence; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amd/display: fix array lenth error.
On 2017-02-06 12:08 AM, Rex Zhu wrote: > Change-Id: I09011c5e6d5493db7e3d9a7ff7ab8c871a8db862 > Signed-off-by: Rex Zhu> --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > index 5af27aa..50576c6 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c > @@ -358,7 +358,7 @@ bool dm_pp_get_clock_levels_by_type( >* non-boosted one. */ > DRM_INFO("DM_PPLIB: reducing engine clock level > from %d to %d\n", > dc_clks->num_levels, i + 1); > - dc_clks->num_levels = i; > + dc_clks->num_levels = i + 1; It seems to me the DRM_INFO print is wrong here, not the actual assignment. We're setting num_levels to the current index if the clocks for that index are higher than the max validation clocks, hence this index now should become num_levels. > break; > } > } > @@ -367,7 +367,7 @@ bool dm_pp_get_clock_levels_by_type( > if (dc_clks->clocks_in_khz[i] > > validation_clks.memory_max_clock) { > DRM_INFO("DM_PPLIB: reducing memory clock level > from %d to %d\n", > dc_clks->num_levels, i + 1); > - dc_clks->num_levels = i; > + dc_clks->num_levels = i + 1; > break; > } > } > Same comment as above. Harry ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 07/21] drm/amdgpu:fix gart table vram pin
Hui? We shouldn't need to call this function from a GPU reset, do we really do so? But even if we call it from GPU reset we certainly should have called the matching unpin function before. Otherwise we certainly won't be able to resume from the next suspend after a GPU reset. Regards, Christian. Am 06.02.2017 um 15:25 schrieb Liu, Monk: E looks like I missed the part of S3 function But if this is from a GPU reset , we also shouldn't continue run this function otherwise GPU reset will fail (SRIOV reset test) BR Monk -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Monday, February 06, 2017 4:14 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 07/21] drm/amdgpu:fix gart table vram pin A bug NAK on this! amdgpu_gart_table_vram_unpin() must be called during suspend. Otherwise the GART table can be corrupted and we run into a whole bunch of problems. We could add a "BUG_ON(adev->gart.table_addr != NULL);" here to double check that, but just ignoring that something went horrible wrong is clearly the wrong approach. Regards, Christian. Am 04.02.2017 um 11:34 schrieb Monk Liu: if this call is from resume, shouldn't enter pin logic at all Change-Id: I40a5cdc2a716c4c20d2812fd74ece4ea284b6765 Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 964d2a9..5e907f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -151,6 +151,11 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) uint64_t gpu_addr; int r; + if (adev->gart.table_addr) { + /* it's a resume call, gart already pin */ + return 0; + } + r = amdgpu_bo_reserve(adev->gart.robj, false); if (unlikely(r != 0)) return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[bug report] drm/amdgpu/virt: implement VI virt operation interfaces
Hello Xiangliang Yu, The patch ab71ac56f6d8: "drm/amdgpu/virt: implement VI virt operation interfaces" from Jan 12, 2017, leads to the following static checker warning: drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c:310 xgpu_vi_init_golden_registers() warn: string literals are always true. drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c 294 break; 295 case CHIP_TONGA: 296 amdgpu_program_register_sequence(adev, 297 xgpu_tonga_mgcg_cgcg_init, 298 (const u32)ARRAY_SIZE( 299 xgpu_tonga_mgcg_cgcg_init)); 300 amdgpu_program_register_sequence(adev, 301 xgpu_tonga_golden_settings_a11, 302 (const u32)ARRAY_SIZE( 303 xgpu_tonga_golden_settings_a11)); 304 amdgpu_program_register_sequence(adev, 305 xgpu_tonga_golden_common_all, 306 (const u32)ARRAY_SIZE( 307 xgpu_tonga_golden_common_all)); 308 break; 309 default: 310 BUG_ON("Doesn't support chip type.\n"); BUG_ON() takes a condition argument, not a format string. 311 break; 312 } 313 } regards, dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 07/21] drm/amdgpu:fix gart table vram pin
E looks like I missed the part of S3 function But if this is from a GPU reset , we also shouldn't continue run this function otherwise GPU reset will fail (SRIOV reset test) BR Monk -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Monday, February 06, 2017 4:14 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 07/21] drm/amdgpu:fix gart table vram pin A bug NAK on this! amdgpu_gart_table_vram_unpin() must be called during suspend. Otherwise the GART table can be corrupted and we run into a whole bunch of problems. We could add a "BUG_ON(adev->gart.table_addr != NULL);" here to double check that, but just ignoring that something went horrible wrong is clearly the wrong approach. Regards, Christian. Am 04.02.2017 um 11:34 schrieb Monk Liu: > if this call is from resume, shouldn't enter pin logic at all > > Change-Id: I40a5cdc2a716c4c20d2812fd74ece4ea284b6765 > Signed-off-by: Monk Liu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > index 964d2a9..5e907f7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > @@ -151,6 +151,11 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device > *adev) > uint64_t gpu_addr; > int r; > > + if (adev->gart.table_addr) { > + /* it's a resume call, gart already pin */ > + return 0; > + } > + > r = amdgpu_bo_reserve(adev->gart.robj, false); > if (unlikely(r != 0)) > return r; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 05/21] drm/amdgpu:BUG if gpu_reste and asic_reset from VF
Actually I have more patches for gpu_reset feature for SRIOV case, but I didn't send out them, so this patch can be skipped. I'll cleanup all those patches more and send out for review later BR Monk -Original Message- From: Christian König [mailto:deathsim...@vodafone.de] Sent: Monday, February 06, 2017 4:24 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 05/21] drm/amdgpu:BUG if gpu_reste and asic_reset from VF Am 04.02.2017 um 11:34 schrieb Monk Liu: > for SRIOV vf, Guest couldn't really access PCI registers so > gpu_reset() and asic_reset should be avoided. > > for suspend it could run for SRIOV because cg/pg routine already > modified for SRIOV vf case, besides we should remove the req/rel gpu > access around it because the req/rel should be used by invoker. > > Change-Id: I678d3f2ce202458c1d1d404970fa47421766b666 > Signed-off-by: Monk Liu Mhm, isn't a BUG_ON() a bit hard here? I mean that usually terminates the current process if not the brings down the whole system. Maybe just adding a WARN() to the "if (amdgpu_sriov_vf(adev)) return 0;" is more appropriate. Regards, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 + > drivers/gpu/drm/amd/amdgpu/vi.c| 2 ++ > 2 files changed, 3 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 6106cd6..173df73 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1566,9 +1566,6 @@ int amdgpu_suspend(struct amdgpu_device *adev) > { > int i, r; > > - if (amdgpu_sriov_vf(adev)) > - amdgpu_virt_request_full_gpu(adev, false); > - > /* ungate SMC block first */ > r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, >AMD_CG_STATE_UNGATE); > @@ -1597,9 +1594,6 @@ int amdgpu_suspend(struct amdgpu_device *adev) > } > } > > - if (amdgpu_sriov_vf(adev)) > - amdgpu_virt_release_full_gpu(adev, false); > - > return 0; > } > > @@ -2356,8 +2350,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) > int resched; > bool need_full_reset; > > - if (amdgpu_sriov_vf(adev)) > - return 0; > + BUG_ON(amdgpu_sriov_vf(adev)); > > if (!amdgpu_check_soft_reset(adev)) { > DRM_INFO("No hardware hang detected. Did some blocks > stall?\n"); > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c > b/drivers/gpu/drm/amd/amdgpu/vi.c index 7810030..557994c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > @@ -739,6 +739,8 @@ static int vi_asic_reset(struct amdgpu_device *adev) > { > int r; > > + BUG_ON(amdgpu_sriov_vf(adev)); > + > amdgpu_atombios_scratch_regs_engine_hung(adev, true); > > r = vi_gpu_pci_config_reset(adev); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 04/21] drm/amdgpu:fix powerplay logic
Thanks, I hadn't test S3 feature currently so didn't run into it by far BR Monk -Original Message- From: Zhu, Rex Sent: Monday, February 06, 2017 8:32 PM To: Liu, Monk; amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: RE: [PATCH 04/21] drm/amdgpu:fix powerplay logic Hi Monk, In static int pp_suspend(void *handle) { ret = pp_check(pp_handle); if (ret != 0) return ret; in suspend function, when dpm disabled/srv, also neet to return 0. Best Regards Rex -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Monk Liu Sent: Saturday, February 04, 2017 6:22 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: [PATCH 04/21] drm/amdgpu:fix powerplay logic 1,like pp_hw_init, we shouldn't report error if PP disabled 2,disable pp_en if sriov Change-Id: I6d259f9609f223998bea236f64676b9c22133e4e Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 2 +- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 8856ecc..d56d200 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev) amd_pp = &(adev->powerplay); pp_init.chip_family = adev->family; pp_init.chip_id = adev->asic_type; - pp_init.pm_en = amdgpu_dpm != 0 ? true : false; + pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : +false; pp_init.feature_mask = amdgpu_pp_feature_mask; pp_init.device = amdgpu_cgs_create_device(adev); ret = amd_powerplay_create(_init, &(amd_pp->pp_handle)); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 429f18b..e9cf207 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -286,7 +286,7 @@ static int pp_resume(void *handle) } if (ret1 == PP_DPM_DISABLED) - return ret1; + return 0; eventmgr = pp_handle->eventmgr; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/4] drm/amdgpu: extend profiling mode.
Please see in line. Best Regards Rex -Original Message- From: Andy Furniss [mailto:adf.li...@gmail.com] Sent: Monday, February 06, 2017 8:32 PM To: Zhu, Rex; Alex Deucher Cc: amd-gfx list Subject: Re: [PATCH 4/4] drm/amdgpu: extend profiling mode. Zhu, Rex wrote: > Sorry for the late response. > > Yes, I set the fan speed to max in this patch when user set high performance. > > Considering that: 1. set fan speed to max is helpful to let GPU run under > highest clock as long as possible. > 2. avoid GPU rapid temperature rise in some case. >>OK, I guess you know if it's needed or not. >>It is somewhat annoying noise wise, maybe one day there will be a way for >>users to set fan back on auto? Rex: sure, you can echo "2"> /sys/class/drm/card0/device/hwmon/hwmon3(maybe not 3)/pwm1_enable >>Accepting I don't know how the h/w works your point 2 would imply that with >>perf on auto doing something like running a benchmark that pegs all the >>clocks high should also Rex: based on the thermal, the fan speed.will be dynamically adjusted. >>I don't think that would be popular WRT noise. Rex: maybe I worried unnecessarily. I will change the code not to set fan speed to max. thanks. > > Best Regards > Rex > > > > -Original Message- > From: Andy Furniss [mailto:adf.li...@gmail.com] > Sent: Wednesday, February 01, 2017 11:37 PM > To: Alex Deucher > Cc: Zhu, Rex; amd-gfx list > Subject: Re: [PATCH 4/4] drm/amdgpu: extend profiling mode. > > Alex Deucher wrote: >> On Tue, Jan 31, 2017 at 6:19 PM, Andy Furnisswrote: >>> Andy Furniss wrote: Rex Zhu wrote: > > in profiling mode, powerplay will fix power state as stable as > possible.and disable gfx cg and LBPW feature. > > profile_standard: as a prerequisite, ensure power and thermal > sustainable, set clocks ratio as close to the highest clock ratio > as possible. > profile_min_sclk: fix mclk as profile_normal, set lowest sclk > profile_min_mclk: fix sclk as profile_normal, set lowest mclk > profile_peak: set highest sclk and mclk, power and thermal not > sustainable > profile_exit: exit profile mode. enable gfx cg/lbpw feature. Testing R9 285 Tonga on drm-next-4.11-wip This commit has the effect that doing echo high > /sys/class/drm/card0/device/power_dpm_force_performance_level instantly forces fan to (I guess) max, where normally it doesn't need anything like as fast with the clocks high when doing nothing else. >>> >>> >>> Ping - just in case this got missed, still the same on current >>> drm-next-4.11-wip >> >> Just a heads up, Rex was looking at this, but it's Chinese New Year this >> week. > > OK, thanks & Happy new year. > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 04/21] drm/amdgpu:fix powerplay logic
Hi Monk, In static int pp_suspend(void *handle) { ret = pp_check(pp_handle); if (ret != 0) return ret; in suspend function, when dpm disabled/srv, also neet to return 0. Best Regards Rex -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Monk Liu Sent: Saturday, February 04, 2017 6:22 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: [PATCH 04/21] drm/amdgpu:fix powerplay logic 1,like pp_hw_init, we shouldn't report error if PP disabled 2,disable pp_en if sriov Change-Id: I6d259f9609f223998bea236f64676b9c22133e4e Signed-off-by: Monk Liu--- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 2 +- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 8856ecc..d56d200 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev) amd_pp = &(adev->powerplay); pp_init.chip_family = adev->family; pp_init.chip_id = adev->asic_type; - pp_init.pm_en = amdgpu_dpm != 0 ? true : false; + pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : +false; pp_init.feature_mask = amdgpu_pp_feature_mask; pp_init.device = amdgpu_cgs_create_device(adev); ret = amd_powerplay_create(_init, &(amd_pp->pp_handle)); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 429f18b..e9cf207 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -286,7 +286,7 @@ static int pp_resume(void *handle) } if (ret1 == PP_DPM_DISABLED) - return ret1; + return 0; eventmgr = pp_handle->eventmgr; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 4/4] drm/amdgpu: extend profiling mode.
Zhu, Rex wrote: Sorry for the late response. Yes, I set the fan speed to max in this patch when user set high performance. Considering that: 1. set fan speed to max is helpful to let GPU run under highest clock as long as possible. 2. avoid GPU rapid temperature rise in some case. OK, I guess you know if it's needed or not. It is somewhat annoying noise wise, maybe one day there will be a way for users to set fan back on auto? Accepting I don't know how the h/w works your point 2 would imply that with perf on auto doing something like running a benchmark that pegs all the clocks high should also instantly force fan high? I don't think that would be popular WRT noise. Best Regards Rex -Original Message- From: Andy Furniss [mailto:adf.li...@gmail.com] Sent: Wednesday, February 01, 2017 11:37 PM To: Alex Deucher Cc: Zhu, Rex; amd-gfx list Subject: Re: [PATCH 4/4] drm/amdgpu: extend profiling mode. Alex Deucher wrote: On Tue, Jan 31, 2017 at 6:19 PM, Andy Furnisswrote: Andy Furniss wrote: Rex Zhu wrote: in profiling mode, powerplay will fix power state as stable as possible.and disable gfx cg and LBPW feature. profile_standard: as a prerequisite, ensure power and thermal sustainable, set clocks ratio as close to the highest clock ratio as possible. profile_min_sclk: fix mclk as profile_normal, set lowest sclk profile_min_mclk: fix sclk as profile_normal, set lowest mclk profile_peak: set highest sclk and mclk, power and thermal not sustainable profile_exit: exit profile mode. enable gfx cg/lbpw feature. Testing R9 285 Tonga on drm-next-4.11-wip This commit has the effect that doing echo high > /sys/class/drm/card0/device/power_dpm_force_performance_level instantly forces fan to (I guess) max, where normally it doesn't need anything like as fast with the clocks high when doing nothing else. Ping - just in case this got missed, still the same on current drm-next-4.11-wip Just a heads up, Rex was looking at this, but it's Chinese New Year this week. OK, thanks & Happy new year. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amd/powerplay: refine code to avoid potential bug that the memory not cleared.
+ clocks->count = mclk_table->count Missing semicolon. Will fix in V2. Best Regards Rex -Original Message- From: Rex Zhu [mailto:rex@amd.com] Sent: Monday, February 06, 2017 1:09 PM To: amd-gfx@lists.freedesktop.org Cc: Zhu, Rex Subject: [PATCH 2/2] drm/amd/powerplay: refine code to avoid potential bug that the memory not cleared. Change-Id: If286d163cbabd8e9921a9d3cfcb71bb2b99aaceb Signed-off-by: Rex Zhu--- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 17 +++-- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 0a6c833..18f8ee7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4397,16 +4397,14 @@ static int smu7_get_sclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks) if (table_info == NULL || table_info->vdd_dep_on_sclk == NULL) return -EINVAL; dep_sclk_table = table_info->vdd_dep_on_sclk; - for (i = 0; i < dep_sclk_table->count; i++) { + for (i = 0; i < dep_sclk_table->count; i++) clocks->clock[i] = dep_sclk_table->entries[i].clk; - clocks->count++; - } + clocks->count = dep_sclk_table->count; } else if (hwmgr->pp_table_version == PP_TABLE_V0) { sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk; - for (i = 0; i < sclk_table->count; i++) { + for (i = 0; i < sclk_table->count; i++) clocks->clock[i] = sclk_table->entries[i].clk; - clocks->count++; - } + clocks->count = sclk_table->count; } return 0; @@ -4440,14 +4438,13 @@ static int smu7_get_mclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks) clocks->clock[i] = dep_mclk_table->entries[i].clk; clocks->latency[i] = smu7_get_mem_latency(hwmgr, dep_mclk_table->entries[i].clk); - clocks->count++; } + clocks->count = dep_mclk_table->count; } else if (hwmgr->pp_table_version == PP_TABLE_V0) { mclk_table = hwmgr->dyn_state.vddc_dependency_on_mclk; - for (i = 0; i < mclk_table->count; i++) { + for (i = 0; i < mclk_table->count; i++) clocks->clock[i] = mclk_table->entries[i].clk; - clocks->count++; - } + clocks->count = mclk_table->count } return 0; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/4] drm/amdgpu: extend profiling mode.
Sorry for the late response. Yes, I set the fan speed to max in this patch when user set high performance. Considering that: 1. set fan speed to max is helpful to let GPU run under highest clock as long as possible. 2. avoid GPU rapid temperature rise in some case. Best Regards Rex -Original Message- From: Andy Furniss [mailto:adf.li...@gmail.com] Sent: Wednesday, February 01, 2017 11:37 PM To: Alex Deucher Cc: Zhu, Rex; amd-gfx list Subject: Re: [PATCH 4/4] drm/amdgpu: extend profiling mode. Alex Deucher wrote: > On Tue, Jan 31, 2017 at 6:19 PM, Andy Furnisswrote: >> Andy Furniss wrote: >>> >>> Rex Zhu wrote: in profiling mode, powerplay will fix power state as stable as possible.and disable gfx cg and LBPW feature. profile_standard: as a prerequisite, ensure power and thermal sustainable, set clocks ratio as close to the highest clock ratio as possible. profile_min_sclk: fix mclk as profile_normal, set lowest sclk profile_min_mclk: fix sclk as profile_normal, set lowest mclk profile_peak: set highest sclk and mclk, power and thermal not sustainable profile_exit: exit profile mode. enable gfx cg/lbpw feature. >>> >>> >>> Testing R9 285 Tonga on drm-next-4.11-wip >>> >>> This commit has the effect that doing >>> >>> echo high > >>> /sys/class/drm/card0/device/power_dpm_force_performance_level >>> >>> instantly forces fan to (I guess) max, where normally it doesn't >>> need anything like as fast with the clocks high when doing nothing else. >> >> >> Ping - just in case this got missed, still the same on current >> drm-next-4.11-wip > > Just a heads up, Rex was looking at this, but it's Chinese New Year this week. OK, thanks & Happy new year. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: don't clean the framebuffer for VF
Hi Christian, The underlying host driver clears VF’s framebuffer when guest driver shake hands with it, that is done before guest driver init. I think it’s unnecessary to clear FB again even with GPU for VF. — Sincerely Yours, Pixel On 06/02/2017, 4:49 PM, "Koenig, Christian"wrote: >Am 06.02.2017 um 07:24 schrieb Pixel Ding: >> The SRIOV host driver cleans framebuffer for each VF, guest driver >> needn't this action which costs much time on some virtualization >> platform, otherwise it might get timeout to initialize. >> >> Signed-off-by: Pixel Ding >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >> index 1e735c4..f1eb4f5 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >> @@ -242,7 +242,9 @@ static int amdgpufb_create(struct drm_fb_helper *helper, >> /* setup helper */ >> rfbdev->helper.fb = fb; >> >> -memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); >> +if (!amdgpu_sriov_vf(adev)) { >> +memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); >> +} > >Nit pick only, but coding style says to not use "{" "}" in an if without >else and only a single line of code. > >Additional to that I'm not sure if that's a good idea. The memory >allocated here might be already be used and so we need to clear it no >matter where it came from. > >It's probably easier to just set the AMDGPU_GEM_CREATE_VRAM_CLEARED in >the call to amdgpu_gem_object_create(). This makes the GPU clear the >memory before the first CPU access to it. > >Regards, >Christian. > >> >> strcpy(info->fix.id, "amdgpudrmfb"); >> > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: don't clean the framebuffer for VF
Hi Pixel, you don't seem to understand the reason for the clear here. It is completely irrelevant that the host is clearing the memory for the guest, the problem is that the guest reuse the memory it got assigned from the host multiple times. IIRC we added this because you could see leftovers of the slash screen in the text console when the resolution wasn't a multiple of the character height. Regards, Christian. Am 06.02.2017 um 10:09 schrieb Ding, Pixel: Hi Christian, The underlying host driver clears VF’s framebuffer when guest driver shake hands with it, that is done before guest driver init. I think it’s unnecessary to clear FB again even with GPU for VF. — Sincerely Yours, Pixel On 06/02/2017, 4:49 PM, "Koenig, Christian"wrote: Am 06.02.2017 um 07:24 schrieb Pixel Ding: The SRIOV host driver cleans framebuffer for each VF, guest driver needn't this action which costs much time on some virtualization platform, otherwise it might get timeout to initialize. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 1e735c4..f1eb4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -242,7 +242,9 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + if (!amdgpu_sriov_vf(adev)) { + memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + } Nit pick only, but coding style says to not use "{" "}" in an if without else and only a single line of code. Additional to that I'm not sure if that's a good idea. The memory allocated here might be already be used and so we need to clear it no matter where it came from. It's probably easier to just set the AMDGPU_GEM_CREATE_VRAM_CLEARED in the call to amdgpu_gem_object_create(). This makes the GPU clear the memory before the first CPU access to it. Regards, Christian. strcpy(info->fix.id, "amdgpudrmfb"); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/6] drm/amdgpu: implement PRT for GFX6 v2
Hi all, We also need below for SI(gmc v6) support. {{{ diff --git a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/ index 0f6c6c8..7155312 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h @@ -11891,5 +11891,9 @@ #define VM_PRT_CNTL__L1_TLB_STORE_INVALID_ENTRIES__SHIFT 0x0003 #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES_MASK 0x0004L #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES__SHIFT 0x0002 +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x0001L +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x0002L +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x0001 }}} Regards, Jerry (Junwei Zhang) Linux Base Graphics SRDC Software Development _ > -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of > Nicolai H?hnle > Sent: Friday, February 03, 2017 22:37 > To: Christian König; amd-gfx@lists.freedesktop.org > Cc: b...@basnieuwenhuizen.nl > Subject: Re: [PATCH 4/6] drm/amdgpu: implement PRT for GFX6 v2 > > On 02.02.2017 11:25, Christian König wrote: > > From: Christian König> > > > Enable/disable the handling globally for now and print a warning when > > we enable it for the first time. > > > > v2: write to the correct register, adjust bits to that hw generation > > > > Signed-off-by: Christian König > > --- > > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 55 > > +++ > > 1 file changed, 55 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > > index e2b0b16..b9b5c24 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > > @@ -398,6 +398,60 @@ static void gmc_v6_0_set_fault_enable_default(struct > amdgpu_device *adev, > > WREG32(mmVM_CONTEXT1_CNTL, tmp); > > } > > > > + /** > > + + * gmc_v8_0_set_prt - set PRT VM fault > > + + * > > + + * @adev: amdgpu_device pointer > > + + * @enable: enable/disable VM fault handling for PRT > > + +*/ > > +static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) > > +{ > > + u32 tmp; > > + > > + if (enable && !adev->mc.prt_warning) { > > + dev_warn(adev->dev, "Disabling VM faults because of PRT > request!\n"); > > + adev->mc.prt_warning = true; > > + } > > + > > + tmp = RREG32(mmVM_PRT_CNTL); > > + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, > > + CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS, > > + enable); > > I get: > > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c: In function ‘gmc_v6_0_set_prt’: > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:419:27: error: > ‘VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK’ > undeclared (first use in this function) >tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, > ^ > and similar compiler errors here. The other patches compile fine. > > Nicolai > > > + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, > > + TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS, > > + enable); > > + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, > > + L2_CACHE_STORE_INVALID_ENTRIES, > > + enable); > > + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, > > + L1_TLB_STORE_INVALID_ENTRIES, > > + enable); > > + WREG32(mmVM_PRT_CNTL, tmp); > > + > > + if (enable) { > > + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> > AMDGPU_GPU_PAGE_SHIFT; > > + uint32_t high = adev->vm_manager.max_pfn; > > + > > + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); > > + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); > > + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); > > + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); > > + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); > > + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); > > + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); > > + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); > > + } else { > > + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfff); > > + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfff); > > + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfff); > > + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfff); > > + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); > > + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); > > + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); > > + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); > > + } > > +} > > + > > static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { > > int r, i; > > @@
Re: [PATCH 4/6] drm/amdgpu: implement PRT for GFX6 v2
Hi Jerry, thanks for the defines. I accidentally compiled the patch without SI support, so I didn't noted that they are missing. Going to integrate them and send out the patch set once more. Regards, Christian. Am 06.02.2017 um 09:51 schrieb Zhang, Jerry: Hi all, We also need below for SI(gmc v6) support. {{{ diff --git a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/ index 0f6c6c8..7155312 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h @@ -11891,5 +11891,9 @@ #define VM_PRT_CNTL__L1_TLB_STORE_INVALID_ENTRIES__SHIFT 0x0003 #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES_MASK 0x0004L #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES__SHIFT 0x0002 +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x0001L +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x0002L +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x0001 }}} Regards, Jerry (Junwei Zhang) Linux Base Graphics SRDC Software Development _ -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Nicolai H?hnle Sent: Friday, February 03, 2017 22:37 To: Christian König; amd-gfx@lists.freedesktop.org Cc: b...@basnieuwenhuizen.nl Subject: Re: [PATCH 4/6] drm/amdgpu: implement PRT for GFX6 v2 On 02.02.2017 11:25, Christian König wrote: From: Christian KönigEnable/disable the handling globally for now and print a warning when we enable it for the first time. v2: write to the correct register, adjust bits to that hw generation Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 55 +++ 1 file changed, 55 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index e2b0b16..b9b5c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -398,6 +398,60 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32(mmVM_CONTEXT1_CNTL, tmp); } + /** + + * gmc_v8_0_set_prt - set PRT VM fault + + * + + * @adev: amdgpu_device pointer + + * @enable: enable/disable VM fault handling for PRT + +*/ +static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) +{ + u32 tmp; + + if (enable && !adev->mc.prt_warning) { + dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); + adev->mc.prt_warning = true; + } + + tmp = RREG32(mmVM_PRT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); I get: drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c: In function ‘gmc_v6_0_set_prt’: drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:419:27: error: ‘VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK’ undeclared (first use in this function) tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, ^ and similar compiler errors here. The other patches compile fine. Nicolai + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L2_CACHE_STORE_INVALID_ENTRIES, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L1_TLB_STORE_INVALID_ENTRIES, + enable); + WREG32(mmVM_PRT_CNTL, tmp); + + if (enable) { + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t high = adev->vm_manager.max_pfn; + + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); + } else { + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfff); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfff); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfff); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfff); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); + } +} +
Re: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF
Hi Pixel, yeah agree with David here, but even busy waiting for the KIQ register read is not really an option in the interrupt handler. Additional to that when we have a VM fault we usually see a mass storm of them. So allocating and scheduling a work item for each fault like you do here will certainly only result in a whole system crash. For now I would say just use DRM_ERROR() to print a warning that a VM fault happen and we can't decode it because we can't access the register under SRIOV. Regards, Christian. Am 06.02.2017 um 09:36 schrieb zhoucm1: Hi Pixel, I got your mean just now, since your VF must use KIQ to read/write registers, which use fence_wait to wait reading register completed. The alternative way is implementing a new kiq reading/writing register way by using udelay instead of fence wait when reading/writing register in interrupt context. Regards, David Zhou On 2017年02月06日 15:55, Ding, Pixel wrote: Thanks you for your comments, David. I totally agree on your point. However, The VM fault status registers record the latest VM fault info no matter when they’re changed, that’s what we care about since we don’t handle too much for VM fault even in bare metal system. On the other hand, what do you think if we insist to sleep in the interrupt context and let the driver runs into a software bug? The VM registers are shared among all VFs and we don’t have a copy for each in hardware, I think there's no other way to access them in this case. Do you have any suggestion? — Sincerely Yours, Pixel On 06/02/2017, 3:33 PM, "Zhou, David(ChunMing)"wrote: INIT_WORK(>base, gmc_v8_0_vm_fault_sched); However VF is used or not, schedule work shouldn't handle registers reading for interrupt, especially for status register, which could have been changed when you handle it in schedule work after interrupt. Regards, David Zhou -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Monday, February 06, 2017 3:00 PM To: amd-gfx@lists.freedesktop.org Cc: Ding, Pixel Subject: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF VF uses KIQ to access registers that invoking fence_wait to get the accessing completed. When VM fault occurs, the driver can't sleep in interrupt context. For some test cases, VM fault is 'legal' and shouldn't cause driver soft lockup. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 46 --- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..75c913f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1231,9 +1231,9 @@ static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) +static int gmc_v8_0_process_vm_fault(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { u32 addr, status, mc_client; @@ -1262,6 +1262,46 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, return 0; } +struct gmc_vm_fault_work { +struct work_struct base; +struct amdgpu_device*adev; +struct amdgpu_irq_src *source; +struct amdgpu_iv_entry *entry; +}; + +static void gmc_v8_0_vm_fault_sched(struct work_struct *work) { +struct gmc_vm_fault_work *vm_work = +container_of(work, struct gmc_vm_fault_work, base); +struct amdgpu_device *adev = vm_work->adev; +struct amdgpu_irq_src *source = vm_work->source; +struct amdgpu_iv_entry *entry = vm_work->entry; + +gmc_v8_0_process_vm_fault(adev, source, entry); + +kfree(vm_work); +} + +static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { +struct gmc_vm_fault_work *work = NULL; + +if (amdgpu_sriov_vf(adev)) { +work = kmalloc(sizeof(struct gmc_vm_fault_work), GFP_ATOMIC); +if (!work) +return -ENOMEM; +INIT_WORK(>base, gmc_v8_0_vm_fault_sched); +work->adev = adev; +work->source = source; +work->entry = entry; +return schedule_work(>base); +} + +return gmc_v8_0_process_vm_fault(adev, source, entry); } + static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: don't clean the framebuffer for VF
Am 06.02.2017 um 07:24 schrieb Pixel Ding: The SRIOV host driver cleans framebuffer for each VF, guest driver needn't this action which costs much time on some virtualization platform, otherwise it might get timeout to initialize. Signed-off-by: Pixel Ding--- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 1e735c4..f1eb4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -242,7 +242,9 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + if (!amdgpu_sriov_vf(adev)) { + memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); + } Nit pick only, but coding style says to not use "{" "}" in an if without else and only a single line of code. Additional to that I'm not sure if that's a good idea. The memory allocated here might be already be used and so we need to clear it no matter where it came from. It's probably easier to just set the AMDGPU_GEM_CREATE_VRAM_CLEARED in the call to amdgpu_gem_object_create(). This makes the GPU clear the memory before the first CPU access to it. Regards, Christian. strcpy(info->fix.id, "amdgpudrmfb"); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF
Hi Pixel, I got your mean just now, since your VF must use KIQ to read/write registers, which use fence_wait to wait reading register completed. The alternative way is implementing a new kiq reading/writing register way by using udelay instead of fence wait when reading/writing register in interrupt context. Regards, David Zhou On 2017年02月06日 15:55, Ding, Pixel wrote: Thanks you for your comments, David. I totally agree on your point. However, The VM fault status registers record the latest VM fault info no matter when they’re changed, that’s what we care about since we don’t handle too much for VM fault even in bare metal system. On the other hand, what do you think if we insist to sleep in the interrupt context and let the driver runs into a software bug? The VM registers are shared among all VFs and we don’t have a copy for each in hardware, I think there's no other way to access them in this case. Do you have any suggestion? — Sincerely Yours, Pixel On 06/02/2017, 3:33 PM, "Zhou, David(ChunMing)"wrote: INIT_WORK(>base, gmc_v8_0_vm_fault_sched); However VF is used or not, schedule work shouldn't handle registers reading for interrupt, especially for status register, which could have been changed when you handle it in schedule work after interrupt. Regards, David Zhou -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Monday, February 06, 2017 3:00 PM To: amd-gfx@lists.freedesktop.org Cc: Ding, Pixel Subject: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF VF uses KIQ to access registers that invoking fence_wait to get the accessing completed. When VM fault occurs, the driver can't sleep in interrupt context. For some test cases, VM fault is 'legal' and shouldn't cause driver soft lockup. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 46 --- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..75c913f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1231,9 +1231,9 @@ static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) +static int gmc_v8_0_process_vm_fault(struct amdgpu_device *adev, +struct amdgpu_irq_src *source, +struct amdgpu_iv_entry *entry) { u32 addr, status, mc_client; @@ -1262,6 +1262,46 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, return 0; } +struct gmc_vm_fault_work { + struct work_struct base; + struct amdgpu_device*adev; + struct amdgpu_irq_src *source; + struct amdgpu_iv_entry *entry; +}; + +static void gmc_v8_0_vm_fault_sched(struct work_struct *work) { + struct gmc_vm_fault_work *vm_work = + container_of(work, struct gmc_vm_fault_work, base); + struct amdgpu_device *adev = vm_work->adev; + struct amdgpu_irq_src *source = vm_work->source; + struct amdgpu_iv_entry *entry = vm_work->entry; + + gmc_v8_0_process_vm_fault(adev, source, entry); + + kfree(vm_work); +} + +static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { + struct gmc_vm_fault_work *work = NULL; + + if (amdgpu_sriov_vf(adev)) { + work = kmalloc(sizeof(struct gmc_vm_fault_work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + INIT_WORK(>base, gmc_v8_0_vm_fault_sched); + work->adev = adev; + work->source = source; + work->entry = entry; + return schedule_work(>base); + } + + return gmc_v8_0_process_vm_fault(adev, source, entry); } + static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe
Indeed a very nice set, only briefly looked over it and only found a small issue on patch #2. Apart from that the set is Acked-by: Christian König. Regards, Christian. Am 04.02.2017 um 13:08 schrieb Edward O'Callaghan: This series is, Reviewed-by: Edward O'Callaghan On 02/04/2017 03:51 PM, Andres Rodriguez wrote: Instead of taking the first pipe and givint the rest to kfd, take the s/givint/giving/ first 2 queues of each pipe. Effectively, amdgpu and amdkfd own the same number of queues. But because the queues are spread over multiple pipes the hardware will be able to better handle concurrent compute workloads. amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4 amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4 Signed-off-by: Andres Rodriguez --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 2218b65..da28174 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2810,7 +2810,7 @@ static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev) break; /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) + if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 04b4448..0a16cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1436,7 +1436,7 @@ static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) break; /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) + if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once
Am 04.02.2017 um 05:51 schrieb Andres Rodriguez: The CP_MEC_DOORBELL_RANGE_* and CP_PQ_STATUS.DOORBELL_ENABLE registers are not HQD specific. They only need to be set once if at least 1 pipe requested doorbell support. Signed-off-by: Andres Rodriguez--- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 147ce0e..9740800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1427,6 +1427,7 @@ struct amdgpu_device { unsignednum_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; boolib_pool_ready; + booldoorbell_enabled; Better put that into amdgpu_gfx and not amdgpu_device, cause that is a gfx (CP) specific state. Apart from that the patch looks good to me. Christian. struct amdgpu_sa_managerring_tmp_bo; /* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index cf738e5..5d0e2c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4796,7 +4796,7 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable) { uint32_t tmp; - if (!enable) + if (!enable || adev->doorbell_enabled) return; if ((adev->asic_type == CHIP_CARRIZO) || @@ -4811,6 +4811,8 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable) tmp = RREG32(mmCP_PQ_STATUS); tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); WREG32(mmCP_PQ_STATUS, tmp); + + adev->doorbell_enabled = true; } static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd) @@ -5108,6 +5110,8 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) { int r; + adev->doorbell_enabled = false; + if (!(adev->flags & AMD_IS_APU)) gfx_v8_0_enable_gui_idle_interrupt(adev, false); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 05/21] drm/amdgpu:BUG if gpu_reste and asic_reset from VF
Am 04.02.2017 um 11:34 schrieb Monk Liu: for SRIOV vf, Guest couldn't really access PCI registers so gpu_reset() and asic_reset should be avoided. for suspend it could run for SRIOV because cg/pg routine already modified for SRIOV vf case, besides we should remove the req/rel gpu access around it because the req/rel should be used by invoker. Change-Id: I678d3f2ce202458c1d1d404970fa47421766b666 Signed-off-by: Monk LiuMhm, isn't a BUG_ON() a bit hard here? I mean that usually terminates the current process if not the brings down the whole system. Maybe just adding a WARN() to the "if (amdgpu_sriov_vf(adev)) return 0;" is more appropriate. Regards, Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 + drivers/gpu/drm/amd/amdgpu/vi.c| 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6106cd6..173df73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1566,9 +1566,6 @@ int amdgpu_suspend(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_request_full_gpu(adev, false); - /* ungate SMC block first */ r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, AMD_CG_STATE_UNGATE); @@ -1597,9 +1594,6 @@ int amdgpu_suspend(struct amdgpu_device *adev) } } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, false); - return 0; } @@ -2356,8 +2350,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) int resched; bool need_full_reset; - if (amdgpu_sriov_vf(adev)) - return 0; + BUG_ON(amdgpu_sriov_vf(adev)); if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 7810030..557994c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -739,6 +739,8 @@ static int vi_asic_reset(struct amdgpu_device *adev) { int r; + BUG_ON(amdgpu_sriov_vf(adev)); + amdgpu_atombios_scratch_regs_engine_hung(adev, true); r = vi_gpu_pci_config_reset(adev); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 03/21] drm/amdgpu:fix scheduler hw reset
Am 06.02.2017 um 04:18 schrieb Zhou, David(ChunMing): I'm curious what problem this patch fix? Any crash? My impression list_for will check if the list is empty, am I wrong? Yeah, I agree as well. list_for won't do anything if the list is empty. So this patch doesn't has any effect as far as I can see. Regards, Christian. Regards, David Zhou -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Monk Liu Sent: Saturday, February 04, 2017 6:22 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, MonkSubject: [PATCH 03/21] drm/amdgpu:fix scheduler hw reset Change-Id: I39e0b77029d22dc3fb37e2f19da699647ae96aad Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ffe1f85..73dd5a7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -384,6 +384,13 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) struct amd_sched_job *s_job; spin_lock(>job_list_lock); + s_job = list_first_entry_or_null(>ring_mirror_list, +struct amd_sched_job, node); + if (!s_job) { + spin_unlock(>job_list_lock); + return; + } + list_for_each_entry_reverse(s_job, >ring_mirror_list, node) { if (fence_remove_callback(s_job->s_fence->parent, _job->s_fence->cb)) { fence_put(s_job->s_fence->parent); @@ -405,6 +412,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) schedule_delayed_work(_job->work_tdr, sched->timeout); + if (!s_job) { + spin_unlock(>job_list_lock); + return; + } + list_for_each_entry_safe(s_job, tmp, >ring_mirror_list, node) { struct amd_sched_fence *s_fence = s_job->s_fence; struct fence *fence; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 01/21] drm/amdgpu:fix typo
Am 04.02.2017 um 11:21 schrieb Monk Liu: Change-Id: I68729b1d32d5e300b8f03a923d2065d51dbe6f7a Signed-off-by: Monk LiuReviewed-by: Christian König for this one. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 463a432..6106cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2447,7 +2447,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) if (fence) { r = fence_wait(fence, false); if (r) { - WARN(r, "recovery from shadow isn't comleted\n"); + WARN(r, "recovery from shadow isn't completed\n"); break; } } @@ -2459,7 +2459,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) if (fence) { r = fence_wait(fence, false); if (r) - WARN(r, "recovery from shadow isn't comleted\n"); + WARN(r, "recovery from shadow isn't completed\n"); } fence_put(fence); } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF
INIT_WORK(>base, gmc_v8_0_vm_fault_sched); However VF is used or not, schedule work shouldn't handle registers reading for interrupt, especially for status register, which could have been changed when you handle it in schedule work after interrupt. Regards, David Zhou -Original Message- From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Pixel Ding Sent: Monday, February 06, 2017 3:00 PM To: amd-gfx@lists.freedesktop.org Cc: Ding, PixelSubject: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF VF uses KIQ to access registers that invoking fence_wait to get the accessing completed. When VM fault occurs, the driver can't sleep in interrupt context. For some test cases, VM fault is 'legal' and shouldn't cause driver soft lockup. Signed-off-by: Pixel Ding --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 46 --- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b32..75c913f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1231,9 +1231,9 @@ static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) +static int gmc_v8_0_process_vm_fault(struct amdgpu_device *adev, +struct amdgpu_irq_src *source, +struct amdgpu_iv_entry *entry) { u32 addr, status, mc_client; @@ -1262,6 +1262,46 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, return 0; } +struct gmc_vm_fault_work { + struct work_struct base; + struct amdgpu_device*adev; + struct amdgpu_irq_src *source; + struct amdgpu_iv_entry *entry; +}; + +static void gmc_v8_0_vm_fault_sched(struct work_struct *work) { + struct gmc_vm_fault_work *vm_work = + container_of(work, struct gmc_vm_fault_work, base); + struct amdgpu_device *adev = vm_work->adev; + struct amdgpu_irq_src *source = vm_work->source; + struct amdgpu_iv_entry *entry = vm_work->entry; + + gmc_v8_0_process_vm_fault(adev, source, entry); + + kfree(vm_work); +} + +static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { + struct gmc_vm_fault_work *work = NULL; + + if (amdgpu_sriov_vf(adev)) { + work = kmalloc(sizeof(struct gmc_vm_fault_work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + INIT_WORK(>base, gmc_v8_0_vm_fault_sched); + work->adev = adev; + work->source = source; + work->entry = entry; + return schedule_work(>base); + } + + return gmc_v8_0_process_vm_fault(adev, source, entry); } + static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx