[PATCH v2 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking
Refine the patch 1, and the lock about invalidate_lock. Unify bare metal and sriov, and add firmware checking for reg write and reg wait unify command. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 59 - 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 53e9e2a..f172e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -274,6 +274,8 @@ struct amdgpu_gfx { uint32_trlc_srls_feature_version; uint32_tmec_feature_version; uint32_tmec2_feature_version; + boolmec_fw_write_wait; + boolme_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsignednum_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4e1e1a0..0cba430 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -482,6 +482,59 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.me_fw_write_wait = false; + adev->gfx.mec_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_VEGA10: + if ((adev->gfx.me_fw_version >= 0x009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0193) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA12: + if ((adev->gfx.me_fw_version >= 0x009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0196) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA20: + if ((adev->gfx.me_fw_version >= 0x009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0197) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_RAVEN: + if ((adev->gfx.me_fw_version >= 0x009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0192) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -716,6 +769,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", @@ -4353,8 +4407,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t ref, uint32_t mask) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? + adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; - if (amdgpu_sriov_vf(ring->adev)) + if (fw_version_ok) gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, ref, mask, 0x20); else -- 2.7.4 ___ amd-gfx
[PATCH v2 2/2] drm/amdgpu: use kiq to do invalidate tlb
To avoid the tlb flush not interrupted by world switch, use kiq and one command to do tlb invalidate. v2: Refine the invalidate lock position. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 74 +--- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6265b88..19ef7711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 20 + int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 21adb1b6..3885636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,9 +22,6 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -#define MAX_KIQ_REG_TRY 20 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ed467de..0bf8439 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) return req; } +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = >gfx.kiq; + struct amdgpu_ring *ring = >ring; + + if (!ring->ready) + return -EINVAL; + + spin_lock_irqsave(>ring_lock, flags); + + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, ); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(>ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may +* block gpu_recover() routine forever, e.g. this virt_kiq_rreg +* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will +* never return if we keep waiting in virt_kiq_rreg, which cause +* gpu_recover() hang there. +* +* also don't wait anymore for IRQ context +* */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq; + + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return 0; + +failed_kiq: + pr_err("failed to invalidate tlb with kiq\n"); + return r; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -332,13 +384,19 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - - spin_lock(>gmc.invalidate_lock); + int r; for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = >vmhub[i]; u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); + if (!r) + continue; + + spin_lock(>gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* Busy wait for ACK.*/ @@ -349,8 +407,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, break; cpu_relax(); } - if (j < 100) + if (j < 100) { + spin_unlock(>gmc.invalidate_lock); continue; + } /* Wait for ACK with a delay.*/ for (j = 0; j < adev->usec_timeout; j++) { @@ -360,13 +420,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct
[PATCH] drm/amdgpu: Remove duplicate code in gfx_v8_0.c
There are no any logical changes here. 1. if kcq can be enabled via kiq, we don't need to do kiq ring test. 2. amdgpu_ring_test_ring function can be used to sync the ring complete, remove the duplicate code. Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 80 ++- 1 file changed, 13 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 282dba6..680f9a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4604,7 +4604,6 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { struct amdgpu_ring *kiq_ring = >gfx.kiq.ring; - uint32_t scratch, tmp = 0; uint64_t queue_mask = 0; int r, i; @@ -4623,17 +4622,10 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) queue_mask |= (1ull << i); } - r = amdgpu_gfx_scratch_get(adev, ); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + kiq_ring->ready = true; + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); return r; } /* set resources */ @@ -4665,25 +4657,12 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("KCQ enable failed\n"); + kiq_ring->ready = false; } - amdgpu_gfx_scratch_free(adev, scratch); - return r; } @@ -5014,15 +4993,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - /* Test KIQ */ - ring = >gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = >gfx.compute_ring[i]; @@ -5092,23 +5062,11 @@ static int gfx_v8_0_hw_init(void *handle) static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) { - struct amdgpu_device *adev = kiq_ring->adev; - uint32_t scratch, tmp = 0; - int r, i; - - r = amdgpu_gfx_scratch_get(adev, ); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); + int r; - r = amdgpu_ring_alloc(kiq_ring, 10); - if (r) { + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } /* unmap queues */ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); @@ -5121,23 +5079,11 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); - r = -EINVAL; - } -
Re: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP
On Fri, Aug 17, 2018 at 10:36:32AM +0800, Evan Quan wrote: > Set correct address base for vega20. > > Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb > Signed-off-by: Evan Quan The patch subject prefix should be "drm/amdgpu:" With that fixed, patch is Reviewed-by: Huang Rui > --- > drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c > b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c > index 52778de93ab0..2d4473557b0d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c > @@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) > adev->reg_offset[ATHUB_HWIP][i] = (uint32_t > *)(&(ATHUB_BASE.instance[i])); > adev->reg_offset[NBIO_HWIP][i] = (uint32_t > *)(&(NBIO_BASE.instance[i])); > adev->reg_offset[MP0_HWIP][i] = (uint32_t > *)(&(MP0_BASE.instance[i])); > + adev->reg_offset[MP1_HWIP][i] = (uint32_t > *)(&(MP1_BASE.instance[i])); > adev->reg_offset[UVD_HWIP][i] = (uint32_t > *)(&(UVD_BASE.instance[i])); > adev->reg_offset[VCE_HWIP][i] = (uint32_t > *)(&(VCE_BASE.instance[i])); > adev->reg_offset[DF_HWIP][i] = (uint32_t > *)(&(DF_BASE.instance[i])); > @@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) > adev->reg_offset[SDMA0_HWIP][i] = (uint32_t > *)(&(SDMA0_BASE.instance[i])); > adev->reg_offset[SDMA1_HWIP][i] = (uint32_t > *)(&(SDMA1_BASE.instance[i])); > adev->reg_offset[SMUIO_HWIP][i] = (uint32_t > *)(&(SMUIO_BASE.instance[i])); > + adev->reg_offset[NBIF_HWIP][i] = (uint32_t > *)(&(NBIO_BASE.instance[i])); > + adev->reg_offset[THM_HWIP][i] = (uint32_t > *)(&(THM_BASE.instance[i])); > } > return 0; > } > -- > 2.18.0 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping
Reviewed-by: Junwei Zhang Regards, Jerry(Junwei Zhang) From: amd-gfx on behalf of Michel Dänzer Sent: Thursday, August 16, 2018 9:54:29 PM To: amd-gfx@lists.freedesktop.org Subject: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping From: Michel Dänzer Arithmetic using void* pointers isn't defined by the C standard, only as a GCC extension. Avoids compiler warnings: ../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’: ../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith] if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) ^ ../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in subtraction [-Wpointer-arith] *offset_in_bo = cpu - bo->cpu_ptr; ^ v2: Use uintptr_t instead of char*, don't change function signature (Junwei Zhang) Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping (v2)") Signed-off-by: Michel Dänzer --- amdgpu/amdgpu_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index 86d1c143..8efd014e 100644 --- a/amdgpu/amdgpu_bo.c +++ b/amdgpu/amdgpu_bo.c @@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, bo = handle_table_lookup(>bo_handles, i); if (!bo || !bo->cpu_ptr || size > bo->alloc_size) continue; - if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) + if (cpu >= bo->cpu_ptr && + cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size)) break; } if (i < dev->bo_handles.max_key) { atomic_inc(>refcount); *buf_handle = bo; - *offset_in_bo = cpu - bo->cpu_ptr; + *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr; } else { *buf_handle = NULL; *offset_in_bo = 0; -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP
Reviewed-by: Alex Deucher From: Evan Quan Sent: Thursday, August 16, 2018 10:36:32 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander; Xu, Feifei; Quan, Evan Subject: [PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP Set correct address base for vega20. Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 52778de93ab0..2d4473557b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); @@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); } return 0; } -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>-Original Message- >From: amd-gfx On Behalf Of Deng, >Emily >Sent: Friday, August 17, 2018 10:19 AM >To: Koenig, Christian ; amd- >g...@lists.freedesktop.org >Subject: RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb > >>-Original Message- >>From: Christian König >>Sent: Thursday, August 16, 2018 9:24 PM >>To: Deng, Emily ; amd-gfx@lists.freedesktop.org >>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb >> >>Am 16.08.2018 um 15:05 schrieb Emily Deng: >>> To avoid the tlb flush not interrupted by world switch, use kiq and >>> one command to do tlb invalidate. >>> >>> Signed-off-by: Emily Deng >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- >>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 >> >>> 3 files changed, 64 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> index 6265b88..19ef7711 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { >>> AMDGPU_CP_KIQ_IRQ_LAST >>> }; >>> >>> +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >>> +#define MAX_KIQ_REG_TRY 20 >>> + >>> int amdgpu_device_ip_set_clockgating_state(void *dev, >>>enum amd_ip_block_type block_type, >>>enum amd_clockgating_state state); >>diff --git >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >>> index 21adb1b6..3885636 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >>> @@ -22,9 +22,6 @@ >>>*/ >>> >>> #include "amdgpu.h" >>> -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >>> -#define MAX_KIQ_REG_TRY 20 >>> >>> uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) >>> { >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>> index ed467de..6c164bd 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>> @@ -311,6 +311,58 @@ static uint32_t >>gmc_v9_0_get_invalidate_req(unsigned int vmid) >>> return req; >>> } >>> >>> +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, >>> + uint32_t reg0, uint32_t reg1, >>> + uint32_t ref, uint32_t mask) { >>> + signed long r, cnt = 0; >>> + unsigned long flags; >>> + uint32_t seq; >>> + struct amdgpu_kiq *kiq = >gfx.kiq; >>> + struct amdgpu_ring *ring = >ring; >>> + >>> + if (!ring->ready) >>> + return -EINVAL; >>> + >>> + spin_lock_irqsave(>ring_lock, flags); >>> + >>> + amdgpu_ring_alloc(ring, 32); >>> + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, >>> + ref, mask); >>> + amdgpu_fence_emit_polling(ring, ); >>> + amdgpu_ring_commit(ring); >>> + spin_unlock_irqrestore(>ring_lock, flags); >>> + >>> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); >>> + >>> + /* don't wait anymore for gpu reset case because this way may >>> +* block gpu_recover() routine forever, e.g. this virt_kiq_rreg >>> +* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will >>> +* never return if we keep waiting in virt_kiq_rreg, which cause >>> +* gpu_recover() hang there. >>> +* >>> +* also don't wait anymore for IRQ context >>> +* */ >>> + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) >>> + goto failed_kiq; >>> + >>> + might_sleep(); >>> + >>> + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { >>> + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); >>> + r = amdgpu_fence_wait_polling(ring, seq, >>MAX_KIQ_REG_WAIT); >>> + } >>> + >>> + if (cnt > MAX_KIQ_REG_TRY) >>> + goto failed_kiq; >>> + >>> + return 0; >>> + >>> +failed_kiq: >>> + pr_err("failed to invalidate tlb with kiq\n"); >>> + return r; >>> +} >>> + >>> /* >>>* GART >>>* VMID 0 is the physical GPU addresses as used by the kernel. >>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct >>amdgpu_device *adev, >>> /* Use register 17 for GART */ >>> const unsigned eng = 17; >>> unsigned i, j; >>> + int r; >>> >>> spin_lock(>gmc.invalidate_lock); >>> >>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct >>amdgpu_device *adev, >>> struct amdgpu_vmhub *hub = >vmhub[i]; >>> u32 tmp = gmc_v9_0_get_invalidate_req(vmid); >>> >>> + spin_unlock(>gmc.invalidate_lock); >> >>Well just remove the lock altogether. Taking it and dropping it again
[PATCH] drm/amd/powerplay: set correct base for THM/NBIF/MP1 IP
Set correct address base for vega20. Change-Id: I7435980e2ca156ee2b443a97899d40aaba4876cb Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 52778de93ab0..2d4473557b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); @@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); } return 0; } -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>-Original Message- >From: Christian König >Sent: Thursday, August 16, 2018 9:24 PM >To: Deng, Emily ; amd-gfx@lists.freedesktop.org >Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb > >Am 16.08.2018 um 15:05 schrieb Emily Deng: >> To avoid the tlb flush not interrupted by world switch, use kiq and >> one command to do tlb invalidate. >> >> Signed-off-by: Emily Deng >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 > >> 3 files changed, 64 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 6265b88..19ef7711 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { >> AMDGPU_CP_KIQ_IRQ_LAST >> }; >> >> +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >> +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >> +#define MAX_KIQ_REG_TRY 20 >> + >> int amdgpu_device_ip_set_clockgating_state(void *dev, >> enum amd_ip_block_type block_type, >> enum amd_clockgating_state state); >diff --git >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> index 21adb1b6..3885636 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> @@ -22,9 +22,6 @@ >>*/ >> >> #include "amdgpu.h" >> -#define MAX_KIQ_REG_WAIT5000 /* in usecs, 5ms */ >> -#define MAX_KIQ_REG_BAILOUT_INTERVAL5 /* in msecs, 5ms */ >> -#define MAX_KIQ_REG_TRY 20 >> >> uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) >> { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index ed467de..6c164bd 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -311,6 +311,58 @@ static uint32_t >gmc_v9_0_get_invalidate_req(unsigned int vmid) >> return req; >> } >> >> +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, >> + uint32_t reg0, uint32_t reg1, >> + uint32_t ref, uint32_t mask) >> +{ >> +signed long r, cnt = 0; >> +unsigned long flags; >> +uint32_t seq; >> +struct amdgpu_kiq *kiq = >gfx.kiq; >> +struct amdgpu_ring *ring = >ring; >> + >> +if (!ring->ready) >> +return -EINVAL; >> + >> +spin_lock_irqsave(>ring_lock, flags); >> + >> +amdgpu_ring_alloc(ring, 32); >> +amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, >> +ref, mask); >> +amdgpu_fence_emit_polling(ring, ); >> +amdgpu_ring_commit(ring); >> +spin_unlock_irqrestore(>ring_lock, flags); >> + >> +r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); >> + >> +/* don't wait anymore for gpu reset case because this way may >> + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg >> + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will >> + * never return if we keep waiting in virt_kiq_rreg, which cause >> + * gpu_recover() hang there. >> + * >> + * also don't wait anymore for IRQ context >> + * */ >> +if (r < 1 && (adev->in_gpu_reset || in_interrupt())) >> +goto failed_kiq; >> + >> +might_sleep(); >> + >> +while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { >> +msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); >> +r = amdgpu_fence_wait_polling(ring, seq, >MAX_KIQ_REG_WAIT); >> +} >> + >> +if (cnt > MAX_KIQ_REG_TRY) >> +goto failed_kiq; >> + >> +return 0; >> + >> +failed_kiq: >> +pr_err("failed to invalidate tlb with kiq\n"); >> +return r; >> +} >> + >> /* >>* GART >>* VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct >amdgpu_device *adev, >> /* Use register 17 for GART */ >> const unsigned eng = 17; >> unsigned i, j; >> +int r; >> >> spin_lock(>gmc.invalidate_lock); >> >> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct >amdgpu_device *adev, >> struct amdgpu_vmhub *hub = >vmhub[i]; >> u32 tmp = gmc_v9_0_get_invalidate_req(vmid); >> >> +spin_unlock(>gmc.invalidate_lock); > >Well just remove the lock altogether. Taking it and dropping it again >immediately doesn't do anything useful. > >> +r = amdgpu_kiq_reg_write_reg_wait(adev, hub- >>vm_inv_eng0_req + eng, >> +hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); >> +spin_lock(>gmc.invalidate_lock); >> +if (!r) >> +continue; >> + >>
RE: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking
>-Original Message- >From: Christian König >Sent: Thursday, August 16, 2018 9:17 PM >To: Deng, Emily ; amd-gfx@lists.freedesktop.org >Subject: Re: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add >firmware checking > >Am 16.08.2018 um 15:05 schrieb Emily Deng: >> Unify bare metal and sriov, and add firmware checking for reg write >> and reg wait unify command. >> >> Signed-off-by: Emily Deng >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 47 >- >> 2 files changed, 48 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> index 53e9e2a..f172e92 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> @@ -274,6 +274,8 @@ struct amdgpu_gfx { >> uint32_trlc_srls_feature_version; >> uint32_tmec_feature_version; >> uint32_tmec2_feature_version; >> +boolmec_fw_write_wait; >> +boolme_fw_write_wait; >> struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; >> unsignednum_gfx_rings; >> struct amdgpu_ring > compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index 76d979e..76e8973 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct >amdgpu_device *adev) >> le32_to_cpu(rlc_hdr- >>reg_list_format_direct_reg_list_length); >> } >> >> +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) >> +{ >> +adev->gfx.me_fw_write_wait = false; >> +adev->gfx.mec_fw_write_wait = false; >> + >> +switch (adev->asic_type) { >> +case CHIP_VEGA10: >> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev- >>gfx.me_feature_version >= 42) && >> +(adev->gfx.pfp_fw_version >= 0x00b1) && (adev- >>gfx.pfp_feature_version >= 42)) >> +adev->gfx.me_fw_write_wait = true; > >Well that looks like the correct solution to me. Only a nit pick left to fix, >the >coding style looks a bit off. > >For example the "if" should be indented so that "(adev.." in both lines is on >the >same column: > >if ((adev ) && > (adev > >And the "adev->gfx.me_fw_write_wait = true;" is to far to the right. > Thanks, will modify as your suggestion. >With that fixed the patch is Acked-by: Christian König >. > >Regards, >Christian. > >> + >> +if ((adev->gfx.mec_fw_version >= 0x0193) && (adev- >>gfx.mec_feature_version >= 42)) >> +adev->gfx.mec_fw_write_wait = true; >> +break; >> +case CHIP_VEGA12: >> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev- >>gfx.me_feature_version >= 44) && >> +(adev->gfx.pfp_fw_version >= 0x00b2) && (adev- >>gfx.pfp_feature_version >= 44)) >> +adev->gfx.me_fw_write_wait = true; >> + >> +if ((adev->gfx.mec_fw_version >= 0x0196) && (adev- >>gfx.mec_feature_version >= 44)) >> +adev->gfx.mec_fw_write_wait = true; >> +break; >> +case CHIP_VEGA20: >> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev- >>gfx.me_feature_version >= 44) && >> +(adev->gfx.pfp_fw_version >= 0x00b2) && (adev- >>gfx.pfp_feature_version >= 44)) >> +adev->gfx.me_fw_write_wait = true; >> + >> +if ((adev->gfx.mec_fw_version >= 0x0197) && (adev- >>gfx.mec_feature_version >= 44)) >> +adev->gfx.mec_fw_write_wait = true; >> +break; >> +case CHIP_RAVEN: >> +if ((adev->gfx.me_fw_version >= 0x009c) && (adev- >>gfx.me_feature_version >= 42) && >> +(adev->gfx.pfp_fw_version >= 0x00b1) && (adev- >>gfx.pfp_feature_version >= 42)) >> +adev->gfx.me_fw_write_wait = true; >> + >> +if ((adev->gfx.mec_fw_version >= 0x0192) && (adev- >>gfx.mec_feature_version >= 42)) >> +adev->gfx.mec_fw_write_wait = true; >> +break; >> +} >> +} >> + >> static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) >> { >> const char *chip_name; >> @@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct >amdgpu_device *adev) >> } >> >> out: >> +gfx_v9_0_check_fw_write_wait(adev); >> if (err) { >> dev_err(adev->dev, >> "gfx9: Failed to load firmware \"%s\"\n", @@ -4348,8 >+4390,11 @@ >> static void
[PATCH] drm/amdgpu/vega20: add missing IP reg offsets
Add additional IP block offsets. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 52778de93ab0..c231ae6f79e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); @@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); } return 0; } -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] Revert "drm/amdgpu/display: Replace CONFIG_DRM_AMD_DC_DCN1_0 with CONFIG_X86"
On Thu, Aug 16, 2018 at 3:45 PM wrote: > > From: "Leo (Sunpeng) Li" > > This reverts commit 8624c3c4dbfe24fc6740687236a2e196f5f4bfb0. > > We need CONFIG_DRM_AMD_DC_DCN1_0 to guard code that is using fp math. MIssing your SOB. WIth that fixed, series is: Acked-by: Alex Deucher ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] [v3] drm: amd: dc: don't use FP math when Kcov is enabled
On Thu, Aug 16, 2018 at 9:56 PM Leo Li wrote: > On 2018-08-11 11:54 AM, Arnd Bergmann wrote: > > > > I tried implementing the two functions in KCOV: __sanitizer_cov_trace_cmpd > > and __sanitizer_cov_trace_cmpf, but that fails to build on architectures > > that do not support any floating-point functions, or would require making > > that code x86 specific as well. I also looked at what it would take to > > Hi Arnd, > > Is there a reason why we can't make __sanitizer_cov_trace_cmpd and > __sanitizer_cov_trace_cmpf X86 dependent? > > I sent out two patches to disable DCN1, but would prefer implementing > these two functions as opposed to disabling a component. I think it should be possible to implement them, perhaps not even hard to do it in an architecture independent way. I tried this at some point and couldn't figure it out, but I suppose it would fix the problem nicely. This would assume that the two functions can only ever be called from a context that already has access to the fpu, which I think is the case here. Arnd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amd/display: Don't build DCN1 when kcov is enabled
On Thu, Aug 16, 2018 at 9:45 PM wrote: > > From: "Leo (Sunpeng) Li" > > DCN1 contains code that utilizes fp math. When > CONFIG_KCOV_INSTRUMENT_ALL and CONFIG_KCOV_ENABLE_COMPARISONS are > enabled, build errors are found. See this earlier patch for details: > > https://lists.freedesktop.org/archives/dri-devel/2018-August/186131.html > > As a short term solution, disable CONFIG_DRM_AMD_DC_DCN1_0 when > KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. In > addition, make it a fully derived config, taking into account > CONFIG_X86. > > Signed-off-by: Leo (Sunpeng) Li Looks good to me, Acked-by: Arnd Bergmann ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH xf86-video-ati 5/5] Remove drmmode_crtc_private_rec::present_vblank_* related code
On Thu, Aug 16, 2018 at 12:20 PM Michel Dänzer wrote: > > From: Michel Dänzer > > Not needed anymore with the more robust mechanisms for preventing nested > drmHandleEvent calls introduced in the previous changes. > > (Ported from amdgpu commit 85cd8eef0cbed7b409b07f58d76dacd34aa3ddea) > > Signed-off-by: Michel Dänzer Series is: Acked-by: Alex Deucher > --- > src/drmmode_display.h | 8 > src/radeon_kms.c | 9 - > src/radeon_present.c | 34 +++--- > 3 files changed, 3 insertions(+), 48 deletions(-) > > diff --git a/src/drmmode_display.h b/src/drmmode_display.h > index a039bf8fb..bc66eda65 100644 > --- a/src/drmmode_display.h > +++ b/src/drmmode_display.h > @@ -111,14 +111,6 @@ typedef struct { > struct drmmode_fb *flip_pending; > /* The FB currently being scanned out by this CRTC, if any */ > struct drmmode_fb *fb; > - > -#ifdef HAVE_PRESENT_H > -/* Deferred processing of Present vblank event */ > -uint64_t present_vblank_event_id; > -uint64_t present_vblank_usec; > -unsigned present_vblank_msc; > -Bool present_flip_expected; > -#endif > } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr; > > typedef struct { > diff --git a/src/radeon_kms.c b/src/radeon_kms.c > index 809d24469..a24776811 100644 > --- a/src/radeon_kms.c > +++ b/src/radeon_kms.c > @@ -541,15 +541,6 @@ radeon_scanout_flip_handler(xf86CrtcPtr crtc, uint32_t > msc, uint64_t usec, > drmmode_fb_reference(pRADEONEnt->fd, _crtc->fb, > drmmode_crtc->flip_pending); > radeon_scanout_flip_abort(crtc, event_data); > - > -#ifdef HAVE_PRESENT_H > -if (drmmode_crtc->present_vblank_event_id) { > - present_event_notify(drmmode_crtc->present_vblank_event_id, > -drmmode_crtc->present_vblank_usec, > -drmmode_crtc->present_vblank_msc); > - drmmode_crtc->present_vblank_event_id = 0; > -} > -#endif > } > > > diff --git a/src/radeon_present.c b/src/radeon_present.c > index ffc14a0e8..d0a0c68ca 100644 > --- a/src/radeon_present.c > +++ b/src/radeon_present.c > @@ -52,7 +52,6 @@ static present_screen_info_rec radeon_present_screen_info; > > struct radeon_present_vblank_event { > uint64_t event_id; > -Bool vblank_for_flip; > Bool unflip; > }; > > @@ -120,26 +119,9 @@ static void > radeon_present_vblank_handler(xf86CrtcPtr crtc, unsigned int msc, > uint64_t usec, void *data) > { > -drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; > struct radeon_present_vblank_event *event = data; > > -if (event->vblank_for_flip && > - drmmode_crtc->tear_free && > - drmmode_crtc->scanout_update_pending) { > - if (drmmode_crtc->present_vblank_event_id != 0) { > - xf86DrvMsg(crtc->scrn->scrnIndex, X_WARNING, > - "Need to handle previously deferred vblank event\n"); > - present_event_notify(drmmode_crtc->present_vblank_event_id, > -drmmode_crtc->present_vblank_usec, > -drmmode_crtc->present_vblank_msc); > - } > - > - drmmode_crtc->present_vblank_event_id = event->event_id; > - drmmode_crtc->present_vblank_msc = msc; > - drmmode_crtc->present_vblank_usec = usec; > -} else > - present_event_notify(event->event_id, usec, msc); > - > +present_event_notify(event->event_id, usec, msc); > free(event); > } > > @@ -162,7 +144,6 @@ static int > radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) > { > xf86CrtcPtr xf86_crtc = crtc->devPrivate; > -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private; > ScreenPtr screen = crtc->pScreen; > struct radeon_present_vblank_event *event; > uintptr_t drm_queue_seq; > @@ -171,8 +152,6 @@ radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t > event_id, uint64_t msc) > if (!event) > return BadAlloc; > event->event_id = event_id; > -event->vblank_for_flip = drmmode_crtc->present_flip_expected; > -drmmode_crtc->present_flip_expected = FALSE; > > drm_queue_seq = radeon_drm_queue_alloc(xf86_crtc, >RADEON_DRM_QUEUE_CLIENT_DEFAULT, > @@ -272,7 +251,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr > window, PixmapPtr pixmap, > Bool sync_flip) > { > xf86CrtcPtr xf86_crtc = crtc->devPrivate; > -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private; > ScreenPtr screen = window->drawable.pScreen; > ScrnInfoPtr scrn = xf86_crtc->scrn; > xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); > @@ -281,8 +259,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr > window, PixmapPtr pixmap, > int num_crtcs_on; > int i; > > -drmmode_crtc->present_flip_expected = FALSE; > - > if (!scrn->vtSema) > return
[PATCH] drm/amdgpu/display: disable eDP fast boot optimization on DCE8
Seems to cause blank screens. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=106940 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 350ee3e3e34d..2e3f85ceeaa9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1562,7 +1562,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_eDP_fast_boot_optimize = false; if (edp_link) { - can_eDP_fast_boot_optimize = + /* this seems to cause blank screens on DCE8 */ + if ((dc->ctx->dce_version == DCE_VERSION_8_0) || + (dc->ctx->dce_version == DCE_VERSION_8_1) || + (dc->ctx->dce_version == DCE_VERSION_8_3)) + can_eDP_fast_boot_optimize = false; + else + can_eDP_fast_boot_optimize = edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); } -- 2.13.6 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amd/display: Don't build DCN1 when kcov is enabled
From: "Leo (Sunpeng) Li" DCN1 contains code that utilizes fp math. When CONFIG_KCOV_INSTRUMENT_ALL and CONFIG_KCOV_ENABLE_COMPARISONS are enabled, build errors are found. See this earlier patch for details: https://lists.freedesktop.org/archives/dri-devel/2018-August/186131.html As a short term solution, disable CONFIG_DRM_AMD_DC_DCN1_0 when KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. In addition, make it a fully derived config, taking into account CONFIG_X86. Signed-off-by: Leo (Sunpeng) Li --- drivers/gpu/drm/amd/display/Kconfig | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 4c35625..ed654a7 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -4,18 +4,16 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and Raven ASICs. config DRM_AMD_DC_DCN1_0 - bool "DCN 1.0 Raven family" - depends on DRM_AMD_DC && X86 - default y + def_bool n help - Choose this option if you want to have - RV family for display engine + RV family support for display engine config DEBUG_KERNEL_DC bool "Enable kgdb break in DC" -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
Am 16.08.2018 um 20:43 schrieb Felix Kuehling: On 2018-08-16 02:26 PM, Christian König wrote: Am 16.08.2018 um 20:23 schrieb Felix Kuehling: On 2018-08-16 02:18 PM, Christian König wrote: Am 16.08.2018 um 18:50 schrieb Felix Kuehling: On 2018-08-16 02:43 AM, Christian König wrote: [SNIP] I mean it could be that in the worst case we race and stop a KFD process for no good reason. Right. For a more practical example, a KFD BO can get evicted just before the application decides to unmap it. The preemption happens asynchronously, handled by an SDMA job in the GPU scheduler. That job will have an amdgpu_sync object with the eviction fence in it. While that SDMA job is pending or in progress, the application decides to unmap the BO. That removes the eviction fence from that BO's reservation. But it can't remove the fence from all the sync objects that were previously created and are still in flight. So the preemption will be triggered, and the fence will eventually signal when the KFD preemption is complete. I don't think that's something we can prevent. The worst case is that a preemption happens unnecessarily if an eviction gets triggered just before removing the fence. But removing the fence will prevent future evictions of the BO from triggering a KFD process preemption. That's the best we can do. It's true that you can't drop the SDMA job which wants to evict the BO, but at this time the fence signaling is already underway and not stoppable anymore. Replacing the fence with a new one would just be much more cleaner and fix quite a bunch of corner cases where the KFD process would be preempted without good reason. Replacing the fence cleanly probably also involves a preemption, so you don't gain anything. Mhm, why that? My idea would be to create a new fence, replace the old one with the new one and then manually signal the old one. So why should there be a preemption triggered here? Right maybe you can do this without triggering a preemption, but it would require a bunch of new code. Currently the only thing that replaces an old eviction fence with a new one is a preemption+restore. You'll need to replace the fence in all BOs belonging to the process. Since removing a fence is something you don't want to do, that really means adding a new fence, and leaving the old fence in place until it signals. In the mean time you have two eviction fences active for this process, and if either one gets triggered, you'll get a preemption. Only after all BOs have the new eviction fence, you can disarm the old eviction fence and signal it (without triggering a preemption). The way I see it, this adds a bunch of CPU overhead (depending on the number of BOs in the process), and increases the likelihood of unnecessary preemptions, because it takes that much longer for the operation to complete. Yeah, the CPU overhead is indeed a really good point. Talking about overhead, imagine a process cleaning up at process termination, which unmaps and frees all BOs. Each BO that gets freed replaces the eviction fence on all remaining BOs. If you have N BOs, you'll end up creating N new eviction fences in the process. The overhead scales with O(N²) of the number of BOs in the process. Well as I said that is the only case where I think replacing fences is unproblematic. E.g. when a BO is freed we already have a mechanism to copy all fences to a local reservation object (to prevent that any new fences can be added to the BO if it shares it's reservation object). At this time it should be really easy to filter out BOs you don't want to wait for when the BO is destructed. But anyway, when you can live with the possibility that a stale fence triggers a process preemption than there isn't much point in cleaning this up. Regards, Christian. Regards, Felix Christian. Regards, Felix It's probably quite a bit of more CPU overhead of doing so, but I think that this would still be the more fail prove option. Regards, Christian. Regards, Felix ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
On 2018-08-16 02:26 PM, Christian König wrote: > Am 16.08.2018 um 20:23 schrieb Felix Kuehling: >> On 2018-08-16 02:18 PM, Christian König wrote: >>> Am 16.08.2018 um 18:50 schrieb Felix Kuehling: On 2018-08-16 02:43 AM, Christian König wrote: [SNIP] > I mean it could be that in the worst case we race and stop a KFD > process for no good reason. Right. For a more practical example, a KFD BO can get evicted just before the application decides to unmap it. The preemption happens asynchronously, handled by an SDMA job in the GPU scheduler. That job will have an amdgpu_sync object with the eviction fence in it. While that SDMA job is pending or in progress, the application decides to unmap the BO. That removes the eviction fence from that BO's reservation. But it can't remove the fence from all the sync objects that were previously created and are still in flight. So the preemption will be triggered, and the fence will eventually signal when the KFD preemption is complete. I don't think that's something we can prevent. The worst case is that a preemption happens unnecessarily if an eviction gets triggered just before removing the fence. But removing the fence will prevent future evictions of the BO from triggering a KFD process preemption. That's the best we can do. >>> It's true that you can't drop the SDMA job which wants to evict the >>> BO, but at this time the fence signaling is already underway and not >>> stoppable anymore. >>> >>> Replacing the fence with a new one would just be much more cleaner and >>> fix quite a bunch of corner cases where the KFD process would be >>> preempted without good reason. >> Replacing the fence cleanly probably also involves a preemption, so you >> don't gain anything. > > Mhm, why that? > > My idea would be to create a new fence, replace the old one with the > new one and then manually signal the old one. > > So why should there be a preemption triggered here? Right maybe you can do this without triggering a preemption, but it would require a bunch of new code. Currently the only thing that replaces an old eviction fence with a new one is a preemption+restore. You'll need to replace the fence in all BOs belonging to the process. Since removing a fence is something you don't want to do, that really means adding a new fence, and leaving the old fence in place until it signals. In the mean time you have two eviction fences active for this process, and if either one gets triggered, you'll get a preemption. Only after all BOs have the new eviction fence, you can disarm the old eviction fence and signal it (without triggering a preemption). The way I see it, this adds a bunch of CPU overhead (depending on the number of BOs in the process), and increases the likelihood of unnecessary preemptions, because it takes that much longer for the operation to complete. Talking about overhead, imagine a process cleaning up at process termination, which unmaps and frees all BOs. Each BO that gets freed replaces the eviction fence on all remaining BOs. If you have N BOs, you'll end up creating N new eviction fences in the process. The overhead scales with O(N²) of the number of BOs in the process. Regards, Felix > > Christian. > >> >> Regards, >> Felix >> >>> It's probably quite a bit of more CPU overhead of doing so, but I >>> think that this would still be the more fail prove option. >>> >>> Regards, >>> Christian. >>> >>> Regards, Felix > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
Am 16.08.2018 um 20:23 schrieb Felix Kuehling: On 2018-08-16 02:18 PM, Christian König wrote: Am 16.08.2018 um 18:50 schrieb Felix Kuehling: On 2018-08-16 02:43 AM, Christian König wrote: [SNIP] I mean it could be that in the worst case we race and stop a KFD process for no good reason. Right. For a more practical example, a KFD BO can get evicted just before the application decides to unmap it. The preemption happens asynchronously, handled by an SDMA job in the GPU scheduler. That job will have an amdgpu_sync object with the eviction fence in it. While that SDMA job is pending or in progress, the application decides to unmap the BO. That removes the eviction fence from that BO's reservation. But it can't remove the fence from all the sync objects that were previously created and are still in flight. So the preemption will be triggered, and the fence will eventually signal when the KFD preemption is complete. I don't think that's something we can prevent. The worst case is that a preemption happens unnecessarily if an eviction gets triggered just before removing the fence. But removing the fence will prevent future evictions of the BO from triggering a KFD process preemption. That's the best we can do. It's true that you can't drop the SDMA job which wants to evict the BO, but at this time the fence signaling is already underway and not stoppable anymore. Replacing the fence with a new one would just be much more cleaner and fix quite a bunch of corner cases where the KFD process would be preempted without good reason. Replacing the fence cleanly probably also involves a preemption, so you don't gain anything. Mhm, why that? My idea would be to create a new fence, replace the old one with the new one and then manually signal the old one. So why should there be a preemption triggered here? Christian. Regards, Felix It's probably quite a bit of more CPU overhead of doing so, but I think that this would still be the more fail prove option. Regards, Christian. Regards, Felix ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
On 2018-08-16 02:18 PM, Christian König wrote: > Am 16.08.2018 um 18:50 schrieb Felix Kuehling: >> On 2018-08-16 02:43 AM, Christian König wrote: >> [SNIP] >>> I mean it could be that in the worst case we race and stop a KFD >>> process for no good reason. >> Right. For a more practical example, a KFD BO can get evicted just >> before the application decides to unmap it. The preemption happens >> asynchronously, handled by an SDMA job in the GPU scheduler. That job >> will have an amdgpu_sync object with the eviction fence in it. >> >> While that SDMA job is pending or in progress, the application decides >> to unmap the BO. That removes the eviction fence from that BO's >> reservation. But it can't remove the fence from all the sync objects >> that were previously created and are still in flight. So the preemption >> will be triggered, and the fence will eventually signal when the KFD >> preemption is complete. >> >> I don't think that's something we can prevent. The worst case is that a >> preemption happens unnecessarily if an eviction gets triggered just >> before removing the fence. But removing the fence will prevent future >> evictions of the BO from triggering a KFD process preemption. That's the >> best we can do. > > It's true that you can't drop the SDMA job which wants to evict the > BO, but at this time the fence signaling is already underway and not > stoppable anymore. > > Replacing the fence with a new one would just be much more cleaner and > fix quite a bunch of corner cases where the KFD process would be > preempted without good reason. Replacing the fence cleanly probably also involves a preemption, so you don't gain anything. Regards, Felix > > It's probably quite a bit of more CPU overhead of doing so, but I > think that this would still be the more fail prove option. > > Regards, > Christian. > > >> >> Regards, >> Felix >> > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping
Am 16.08.2018 um 15:54 schrieb Michel Dänzer: From: Michel Dänzer Arithmetic using void* pointers isn't defined by the C standard, only as a GCC extension. Avoids compiler warnings: ../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’: ../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith] if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) ^ ../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in subtraction [-Wpointer-arith] *offset_in_bo = cpu - bo->cpu_ptr; ^ v2: Use uintptr_t instead of char*, don't change function signature (Junwei Zhang) Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping (v2)") Signed-off-by: Michel Dänzer Reviewed-by: Christian König --- amdgpu/amdgpu_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index 86d1c143..8efd014e 100644 --- a/amdgpu/amdgpu_bo.c +++ b/amdgpu/amdgpu_bo.c @@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, bo = handle_table_lookup(>bo_handles, i); if (!bo || !bo->cpu_ptr || size > bo->alloc_size) continue; - if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) + if (cpu >= bo->cpu_ptr && + cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size)) break; } if (i < dev->bo_handles.max_key) { atomic_inc(>refcount); *buf_handle = bo; - *offset_in_bo = cpu - bo->cpu_ptr; + *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr; } else { *buf_handle = NULL; *offset_in_bo = 0; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
Am 16.08.2018 um 18:50 schrieb Felix Kuehling: On 2018-08-16 02:43 AM, Christian König wrote: [SNIP] I mean it could be that in the worst case we race and stop a KFD process for no good reason. Right. For a more practical example, a KFD BO can get evicted just before the application decides to unmap it. The preemption happens asynchronously, handled by an SDMA job in the GPU scheduler. That job will have an amdgpu_sync object with the eviction fence in it. While that SDMA job is pending or in progress, the application decides to unmap the BO. That removes the eviction fence from that BO's reservation. But it can't remove the fence from all the sync objects that were previously created and are still in flight. So the preemption will be triggered, and the fence will eventually signal when the KFD preemption is complete. I don't think that's something we can prevent. The worst case is that a preemption happens unnecessarily if an eviction gets triggered just before removing the fence. But removing the fence will prevent future evictions of the BO from triggering a KFD process preemption. That's the best we can do. It's true that you can't drop the SDMA job which wants to evict the BO, but at this time the fence signaling is already underway and not stoppable anymore. Replacing the fence with a new one would just be much more cleaner and fix quite a bunch of corner cases where the KFD process would be preempted without good reason. It's probably quite a bit of more CPU overhead of doing so, but I think that this would still be the more fail prove option. Regards, Christian. Regards, Felix ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[pull] amdgpu drm-next-4.19
Hi Dave, Fixes for 4.19: - Add VCN PSP FW loading for RV (this is required on upcoming parts) - Fix scheduler setup ordering for VCE and UVD - Few misc display fixes The following changes since commit 557ce95051c8eff67af48612ab350d8408aa0541: Merge branch 'drm-next-4.19' of git://people.freedesktop.org/~agd5f/linux into drm-next (2018-08-10 11:43:02 +1000) are available in the git repository at: git://people.freedesktop.org/~agd5f/linux drm-next-4.19 for you to fetch changes up to c9533d1bca3393fbdfe5b24ccb4cfe6a1a02d794: drm/amdgpu: Use kvmalloc for allocating UVD/VCE/VCN BO backup memory (2018-08-16 12:59:11 -0500) Charlene Liu (1): drm/amd/display: fix single link DVI has no display Emily Deng (2): drm/amdgpu/uvd: UVD entity initialization relys on ring initialization drm/amdgpu/vce: VCE entity initialization relies on ring initializtion James Zhu (2): drm/amdgpu:add tmr mc address into amdgpu_firmware_info drm/amdgpu: update tmr mc address Jerry (Fangzhi) Zuo (1): drm/amd/display: Fix warning observed in mode change on Vega Likun Gao (3): drm/amdgpu:add new firmware id for VCN drm/amdgpu:add VCN support in PSP driver drm/amdgpu:add VCN booting with firmware loaded by PSP Michel Dänzer (1): drm/amdgpu: Use kvmalloc for allocating UVD/VCE/VCN BO backup memory Mikita Lipski (3): drm/amd/display: Allow clock sharing b/w HDMI and DVI drm/amd/display: Check if clock source in use before disabling drm/amd/display: Pass connector id when executing VBIOS CT Nicholas Kazlauskas (1): drm/amd/display: Guard against null crtc in CRC IRQ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 5 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c| 38 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c| 33 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c| 23 +- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 3 ++ drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 4 ++ drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 4 ++ drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 5 +++ drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 + drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 10 - drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 40 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 10 - drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 + drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 50 +++--- .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 4 +- .../amd/display/dc/dce110/dce110_hw_sequencer.c| 4 +- .../display/dc/dce120/dce120_timing_generator.c| 2 +- drivers/gpu/drm/amd/display/dc/inc/resource.h | 5 +++ 23 files changed, 189 insertions(+), 65 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
On 2018-08-16 02:43 AM, Christian König wrote: > Am 15.08.2018 um 20:49 schrieb Felix Kuehling: >> On 2018-08-15 02:27 PM, Christian König wrote: >>> Am 15.08.2018 um 20:17 schrieb Felix Kuehling: On 2018-08-15 03:02 AM, Christian König wrote: > Hi Felix, > > yeah, you pretty much nailed it. > > The problem is that the array itself is RCU protected. This means > that > you can only copy the whole structure when you want to update it. > > The exception is reservation_object_add_shared() which only works > because we replace an either signaled fence or a fence within the > same > context but a later sequence number. > > This also explains why this is only a band aid and the whole approach > of removing fences doesn't work in general. At any time somebody > could > have taken an RCU reference to the old array, created a copy of it > and > is now still using this one. > > The only 100% correct solution would be to mark the existing fence as > signaled and replace it everywhere else. Depends what you're trying to achieve. I think the problem you see is, that some reader may still be using the old reservation_object_list copy with the fence still in it. But, the remaining lifetime of the reservation_object_list copy is limited. If we wanted to be sure no more copies with the old fence exist, all we'd need to do is call synchronize_rcu. Maybe we need to do that before releasing the fence references, or release the fence reference in an RCU callback to be safe. >>> The assumption that the fence would die with the array is what is >>> incorrect here. >> I'm making no such assumption. The point is not to destroy the fence. >> Only to remove the fence reference from the reservation object. That is, >> we want any BO with this reservation object to stop checking or waiting >> for our eviction fence. > > Maybe I'm overthinking this, but let me explain the point once more. > > See reservation_object_wait_timeout_rcu() for an example of the > problem I see here: >> seq = read_seqcount_begin(>seq); >> rcu_read_lock(); > >> if (!dma_fence_get_rcu(lfence)) >> goto unlock_retry; > ... >> rcu_read_unlock(); > ... >> if (read_seqcount_retry(>seq, seq)) { >> dma_fence_put(fence); >> goto retry; >> } > ... >> ret = dma_fence_wait_timeout(fence, intr, ret); > > In other words the RCU mechanism guarantees that we also wait for > newly added fences, but it does not guarantee that we don't wait for a > fence which is already removed. I understand that. > >>> The lifetime of RCUed array object is limit, but there is absolutely >>> no guarantee that somebody didn't made a copy of the fences. >>> >>> E.g. somebody could have called reservation_object_get_fences_rcu(), >>> reservation_object_copy_fences() or a concurrent >>> reservation_object_wait_timeout_rcu() is underway. >> That's fine. Then there will be additional fence references. When we >> remove a fence from a reservation object, we don't know and don't care >> who else is holding a reference to the fence anyway. This is no >> different. > > So the KFD implementation doesn't care that the fence is removed from > a BO but somebody could still start to wait on it because of the BO? > > I mean it could be that in the worst case we race and stop a KFD > process for no good reason. Right. For a more practical example, a KFD BO can get evicted just before the application decides to unmap it. The preemption happens asynchronously, handled by an SDMA job in the GPU scheduler. That job will have an amdgpu_sync object with the eviction fence in it. While that SDMA job is pending or in progress, the application decides to unmap the BO. That removes the eviction fence from that BO's reservation. But it can't remove the fence from all the sync objects that were previously created and are still in flight. So the preemption will be triggered, and the fence will eventually signal when the KFD preemption is complete. I don't think that's something we can prevent. The worst case is that a preemption happens unnecessarily if an eviction gets triggered just before removing the fence. But removing the fence will prevent future evictions of the BO from triggering a KFD process preemption. That's the best we can do. Regards, Felix > > Regards, > Christian. > >> >> Regards, >> Felix >> >>> That's also the reason why fences live for much longer than their >>> signaling, e.g. somebody can have a reference to the fence object even >>> hours after it is signaled. >>> >>> Regards, >>> Christian. >>> Regards, Felix > Going to fix the copy error I made with the sequence number and > send it out again. > > Regards, > Christian. > >
Re: [PATCH xf86-video-ati] Use correct FB handle in radeon_do_pageflip
On Thu, Aug 16, 2018 at 12:25 PM Michel Dänzer wrote: > > From: Michel Dänzer > > We were always using the handle of the client provided FB, which > prevented RandR transforms from working, and could result in a black > screen. > > Fixes: 740f0850f1e4 "Store FB for each CRTC in drmmode_flipdata_rec" > (Ported from amdgpu commit f6cd72e64e85896b6d155bee0930e59771dcb701) > > Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher > --- > src/drmmode_display.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/drmmode_display.c b/src/drmmode_display.c > index 0b92b70c6..68d6254d7 100644 > --- a/src/drmmode_display.c > +++ b/src/drmmode_display.c > @@ -3386,7 +3386,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr > client, > if (crtc == ref_crtc) { > if (drmmode_page_flip_target_absolute(pRADEONEnt, > drmmode_crtc, > - fb->handle, > + > flipdata->fb[i]->handle, > flip_flags, > drm_queue_seq, > target_msc) != > 0) > @@ -3394,7 +3394,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr > client, > } else { > if (drmmode_page_flip_target_relative(pRADEONEnt, > drmmode_crtc, > - fb->handle, > + > flipdata->fb[i]->handle, > flip_flags, > drm_queue_seq, > 0) != 0) > goto flip_error; > -- > 2.18.0 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-ati] Use correct FB handle in radeon_do_pageflip
From: Michel Dänzer We were always using the handle of the client provided FB, which prevented RandR transforms from working, and could result in a black screen. Fixes: 740f0850f1e4 "Store FB for each CRTC in drmmode_flipdata_rec" (Ported from amdgpu commit f6cd72e64e85896b6d155bee0930e59771dcb701) Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 0b92b70c6..68d6254d7 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -3386,7 +3386,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, if (crtc == ref_crtc) { if (drmmode_page_flip_target_absolute(pRADEONEnt, drmmode_crtc, - fb->handle, + flipdata->fb[i]->handle, flip_flags, drm_queue_seq, target_msc) != 0) @@ -3394,7 +3394,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, } else { if (drmmode_page_flip_target_relative(pRADEONEnt, drmmode_crtc, - fb->handle, + flipdata->fb[i]->handle, flip_flags, drm_queue_seq, 0) != 0) goto flip_error; -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-ati 5/5] Remove drmmode_crtc_private_rec::present_vblank_* related code
From: Michel Dänzer Not needed anymore with the more robust mechanisms for preventing nested drmHandleEvent calls introduced in the previous changes. (Ported from amdgpu commit 85cd8eef0cbed7b409b07f58d76dacd34aa3ddea) Signed-off-by: Michel Dänzer --- src/drmmode_display.h | 8 src/radeon_kms.c | 9 - src/radeon_present.c | 34 +++--- 3 files changed, 3 insertions(+), 48 deletions(-) diff --git a/src/drmmode_display.h b/src/drmmode_display.h index a039bf8fb..bc66eda65 100644 --- a/src/drmmode_display.h +++ b/src/drmmode_display.h @@ -111,14 +111,6 @@ typedef struct { struct drmmode_fb *flip_pending; /* The FB currently being scanned out by this CRTC, if any */ struct drmmode_fb *fb; - -#ifdef HAVE_PRESENT_H -/* Deferred processing of Present vblank event */ -uint64_t present_vblank_event_id; -uint64_t present_vblank_usec; -unsigned present_vblank_msc; -Bool present_flip_expected; -#endif } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr; typedef struct { diff --git a/src/radeon_kms.c b/src/radeon_kms.c index 809d24469..a24776811 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -541,15 +541,6 @@ radeon_scanout_flip_handler(xf86CrtcPtr crtc, uint32_t msc, uint64_t usec, drmmode_fb_reference(pRADEONEnt->fd, _crtc->fb, drmmode_crtc->flip_pending); radeon_scanout_flip_abort(crtc, event_data); - -#ifdef HAVE_PRESENT_H -if (drmmode_crtc->present_vblank_event_id) { - present_event_notify(drmmode_crtc->present_vblank_event_id, -drmmode_crtc->present_vblank_usec, -drmmode_crtc->present_vblank_msc); - drmmode_crtc->present_vblank_event_id = 0; -} -#endif } diff --git a/src/radeon_present.c b/src/radeon_present.c index ffc14a0e8..d0a0c68ca 100644 --- a/src/radeon_present.c +++ b/src/radeon_present.c @@ -52,7 +52,6 @@ static present_screen_info_rec radeon_present_screen_info; struct radeon_present_vblank_event { uint64_t event_id; -Bool vblank_for_flip; Bool unflip; }; @@ -120,26 +119,9 @@ static void radeon_present_vblank_handler(xf86CrtcPtr crtc, unsigned int msc, uint64_t usec, void *data) { -drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; struct radeon_present_vblank_event *event = data; -if (event->vblank_for_flip && - drmmode_crtc->tear_free && - drmmode_crtc->scanout_update_pending) { - if (drmmode_crtc->present_vblank_event_id != 0) { - xf86DrvMsg(crtc->scrn->scrnIndex, X_WARNING, - "Need to handle previously deferred vblank event\n"); - present_event_notify(drmmode_crtc->present_vblank_event_id, -drmmode_crtc->present_vblank_usec, -drmmode_crtc->present_vblank_msc); - } - - drmmode_crtc->present_vblank_event_id = event->event_id; - drmmode_crtc->present_vblank_msc = msc; - drmmode_crtc->present_vblank_usec = usec; -} else - present_event_notify(event->event_id, usec, msc); - +present_event_notify(event->event_id, usec, msc); free(event); } @@ -162,7 +144,6 @@ static int radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) { xf86CrtcPtr xf86_crtc = crtc->devPrivate; -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private; ScreenPtr screen = crtc->pScreen; struct radeon_present_vblank_event *event; uintptr_t drm_queue_seq; @@ -171,8 +152,6 @@ radeon_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) if (!event) return BadAlloc; event->event_id = event_id; -event->vblank_for_flip = drmmode_crtc->present_flip_expected; -drmmode_crtc->present_flip_expected = FALSE; drm_queue_seq = radeon_drm_queue_alloc(xf86_crtc, RADEON_DRM_QUEUE_CLIENT_DEFAULT, @@ -272,7 +251,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, PixmapPtr pixmap, Bool sync_flip) { xf86CrtcPtr xf86_crtc = crtc->devPrivate; -drmmode_crtc_private_ptr drmmode_crtc = xf86_crtc->driver_private; ScreenPtr screen = window->drawable.pScreen; ScrnInfoPtr scrn = xf86_crtc->scrn; xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); @@ -281,8 +259,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, PixmapPtr pixmap, int num_crtcs_on; int i; -drmmode_crtc->present_flip_expected = FALSE; - if (!scrn->vtSema) return FALSE; @@ -313,7 +289,6 @@ radeon_present_check_flip(RRCrtcPtr crtc, WindowPtr window, PixmapPtr pixmap, if (num_crtcs_on == 0) return FALSE; -drmmode_crtc->present_flip_expected = TRUE; return TRUE; } @@ -354,7 +329,6 @@ radeon_present_flip(RRCrtcPtr crtc, uint64_t event_id, uint64_t target_msc,
[PATCH xf86-video-ati 1/5] Move DRM event queue related initialization to radeon_drm_queue_init
From: Michel Dänzer And make radeon_drm_queue_handler not directly accessible outside of radeon_drm_queue.c. (Ported from amdgpu commit 0148283984c77f7a6e97026edc3093497547e0a4) Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 4 src/radeon_dri2.c | 14 -- src/radeon_drm_queue.c | 11 +-- src/radeon_drm_queue.h | 5 + src/radeon_kms.c | 2 +- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 0b92b70c6..2e8f91d6e 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -2742,10 +2742,6 @@ Bool drmmode_pre_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int cpp) xf86InitialConfiguration(pScrn, TRUE); - drmmode->event_context.version = 2; - drmmode->event_context.vblank_handler = radeon_drm_queue_handler; - drmmode->event_context.page_flip_handler = radeon_drm_queue_handler; - pRADEONEnt->has_page_flip_target = drmmode_probe_page_flip_target(pRADEONEnt); drmModeFreeResources(mode_res); diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c index c36e06f22..4d12fc09a 100644 --- a/src/radeon_dri2.c +++ b/src/radeon_dri2.c @@ -968,13 +968,15 @@ CARD32 radeon_dri2_deferred_event(OsTimerPtr timer, CARD32 now, pointer data) scrn = crtc->scrn; pRADEONEnt = RADEONEntPriv(scrn); +drmmode_crtc = event_info->crtc->driver_private; ret = drmmode_get_current_ust(pRADEONEnt->fd, _now); if (ret) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "%s cannot get current time\n", __func__); if (event_info->drm_queue_seq) - radeon_drm_queue_handler(pRADEONEnt->fd, 0, 0, 0, -(void*)event_info->drm_queue_seq); + drmmode_crtc->drmmode->event_context. + vblank_handler(pRADEONEnt->fd, 0, 0, 0, + (void*)event_info->drm_queue_seq); else radeon_dri2_frame_event_handler(crtc, 0, 0, data); return 0; @@ -983,15 +985,15 @@ CARD32 radeon_dri2_deferred_event(OsTimerPtr timer, CARD32 now, pointer data) * calculate the frame number from current time * that would come from CRTC if it were running */ -drmmode_crtc = event_info->crtc->driver_private; delta_t = drm_now - (CARD64)drmmode_crtc->dpms_last_ust; delta_seq = delta_t * drmmode_crtc->dpms_last_fps; delta_seq /= 100; frame = (CARD64)drmmode_crtc->dpms_last_seq + delta_seq; if (event_info->drm_queue_seq) - radeon_drm_queue_handler(pRADEONEnt->fd, frame, drm_now / 100, -drm_now % 100, -(void*)event_info->drm_queue_seq); + drmmode_crtc->drmmode->event_context. + vblank_handler(pRADEONEnt->fd, frame, drm_now / 100, + drm_now % 100, + (void*)event_info->drm_queue_seq); else radeon_dri2_frame_event_handler(crtc, frame, drm_now, data); return 0; diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c index ac775f86a..bff010fa3 100644 --- a/src/radeon_drm_queue.c +++ b/src/radeon_drm_queue.c @@ -57,7 +57,7 @@ static uintptr_t radeon_drm_queue_seq; /* * Handle a DRM event */ -void +static void radeon_drm_queue_handler(int fd, unsigned int frame, unsigned int sec, unsigned int usec, void *user_ptr) { @@ -181,8 +181,15 @@ radeon_drm_abort_id(uint64_t id) * Initialize the DRM event queue */ void -radeon_drm_queue_init() +radeon_drm_queue_init(ScrnInfoPtr scrn) { +RADEONInfoPtr info = RADEONPTR(scrn); +drmmode_ptr drmmode = >drmmode; + +drmmode->event_context.version = 2; +drmmode->event_context.vblank_handler = radeon_drm_queue_handler; +drmmode->event_context.page_flip_handler = radeon_drm_queue_handler; + if (radeon_drm_queue_refcnt++) return; diff --git a/src/radeon_drm_queue.h b/src/radeon_drm_queue.h index c3e2076db..b6aab37c5 100644 --- a/src/radeon_drm_queue.h +++ b/src/radeon_drm_queue.h @@ -40,9 +40,6 @@ typedef void (*radeon_drm_handler_proc)(xf86CrtcPtr crtc, uint32_t seq, uint64_t usec, void *data); typedef void (*radeon_drm_abort_proc)(xf86CrtcPtr crtc, void *data); -void radeon_drm_queue_handler(int fd, unsigned int frame, - unsigned int tv_sec, unsigned int tv_usec, - void *user_ptr); uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr client, uint64_t id, void *data, radeon_drm_handler_proc handler, @@ -50,7 +47,7 @@ uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr client, void radeon_drm_abort_client(ClientPtr client); void radeon_drm_abort_entry(uintptr_t seq); void radeon_drm_abort_id(uint64_t id); -void radeon_drm_queue_init(); +void
[PATCH xf86-video-ati 4/5] Add radeon_drm_wait_pending_flip function
From: Michel Dänzer Replacing the drmmode_crtc_wait_pending_event macro. (Ported from amdgpu commit 6029794e8a35417faf825491a89b85f713c77fc1) Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 18 -- src/drmmode_display.h | 4 src/radeon_drm_queue.c | 41 +++-- src/radeon_drm_queue.h | 1 + 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index c022b3e4f..a55cd08a0 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -324,6 +324,9 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode) nominal_frame_rate /= pix_in_frame; drmmode_crtc->dpms_last_fps = nominal_frame_rate; } + + drmmode_crtc->dpms_mode = mode; + radeon_drm_queue_handle_deferred(crtc); } else if (drmmode_crtc->dpms_mode != DPMSModeOn && mode == DPMSModeOn) { /* * Off->On transition: calculate and accumulate the @@ -341,8 +344,9 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode) drmmode_crtc->interpolated_vblanks += delta_seq; } + + drmmode_crtc->dpms_mode = DPMSModeOn; } - drmmode_crtc->dpms_mode = mode; } static void @@ -972,6 +976,7 @@ done: } } + radeon_drm_queue_handle_deferred(crtc); return ret; } @@ -1763,11 +1768,6 @@ drmmode_output_set_tear_free(RADEONEntPtr pRADEONEnt, drmmode_output->tear_free = tear_free; if (crtc) { - /* Wait for pending flips before drmmode_set_mode_major calls -* drmmode_crtc_update_tear_free, to prevent a nested -* drmHandleEvent call, which would hang -*/ - radeon_drm_wait_pending_flip(crtc); drmmode_set_mode_major(crtc, >mode, crtc->rotation, crtc->x, crtc->y); } @@ -3278,6 +3278,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, drmmode_crtc_private_ptr drmmode_crtc = config->crtc[0]->driver_private; uint32_t flip_flags = flip_sync == FLIP_ASYNC ? DRM_MODE_PAGE_FLIP_ASYNC : 0; drmmode_flipdata_ptr flipdata; + Bool handle_deferred = FALSE; uintptr_t drm_queue_seq = 0; struct drmmode_fb *fb; int i = 0; @@ -3360,6 +3361,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, if (drmmode_crtc->scanout_update_pending) { radeon_drm_wait_pending_flip(crtc); + handle_deferred = TRUE; radeon_drm_abort_entry(drmmode_crtc->scanout_update_pending); drmmode_crtc->scanout_update_pending = 0; } @@ -3395,6 +3397,8 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, drm_queue_seq = 0; } + if (handle_deferred) + radeon_drm_queue_handle_deferred(ref_crtc); if (flipdata->flip_count > 0) return TRUE; @@ -3414,5 +3418,7 @@ error: xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n", strerror(errno)); + if (handle_deferred) + radeon_drm_queue_handle_deferred(ref_crtc); return FALSE; } diff --git a/src/drmmode_display.h b/src/drmmode_display.h index 46449c8e3..a039bf8fb 100644 --- a/src/drmmode_display.h +++ b/src/drmmode_display.h @@ -103,6 +103,10 @@ typedef struct { * modeset) */ Bool need_modeset; +/* For keeping track of nested calls to drm_wait_pending_flip / + * drm_queue_handle_deferred + */ +int wait_flip_nesting_level; /* A flip to this FB is pending for this CRTC */ struct drmmode_fb *flip_pending; /* The FB currently being scanned out by this CRTC, if any */ diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c index 3d2f4d157..857278fdd 100644 --- a/src/radeon_drm_queue.c +++ b/src/radeon_drm_queue.c @@ -117,6 +117,30 @@ radeon_drm_vblank_handler(int fd, unsigned int frame, unsigned int sec, user_ptr); } +/* + * Handle deferred DRM vblank events + * + * This function must be called after radeon_drm_wait_pending_flip, once + * it's safe to attempt queueing a flip again + */ +void +radeon_drm_queue_handle_deferred(xf86CrtcPtr crtc) +{ +drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; +struct radeon_drm_queue_entry *e, *tmp; + +if (drmmode_crtc->wait_flip_nesting_level == 0 || + --drmmode_crtc->wait_flip_nesting_level > 0) + return; + +xorg_list_for_each_entry_safe(e, tmp, _drm_vblank_signalled, list) { + drmmode_crtc_private_ptr drmmode_crtc = e->crtc->driver_private; + + if (drmmode_crtc->wait_flip_nesting_level == 0) +
[PATCH xf86-video-ati 3/5] Add radeon_drm_handle_event wrapper for drmHandleEvent
From: Michel Dänzer Instead of processing DRM events directly from drmHandleEvent's callbacks, there are three phases: 1. drmHandleEvent is called, and signalled events are re-queued to _signalled lists from its callbacks. 2. Signalled page flip completion events are processed. 3. Signalled vblank events are processed. This should make sure that we never call drmHandleEvent from one of its callbacks, which would usually result in blocking forever. (Ported from amdgpu commit 739181c8d3334ff14b5a607895dfdeb29b0d9020) Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 13 ++--- src/radeon_drm_queue.c | 110 - src/radeon_drm_queue.h | 1 + src/radeon_present.c | 2 +- 4 files changed, 96 insertions(+), 30 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 59fe9b7fc..c022b3e4f 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -2587,9 +2587,8 @@ static void drm_wakeup_handler(pointer data, int err, pointer p) #endif { - ScrnInfoPtr scrn = data; - RADEONEntPtr pRADEONEnt = RADEONEntPriv(scrn); - RADEONInfoPtr info = RADEONPTR(scrn); + drmmode_ptr drmmode = data; + RADEONEntPtr pRADEONEnt = RADEONEntPriv(drmmode->scrn); #if !HAVE_NOTIFY_FD fd_set *read_mask = p; @@ -2597,7 +2596,7 @@ drm_wakeup_handler(pointer data, int err, pointer p) if (err >= 0 && FD_ISSET(pRADEONEnt->fd, read_mask)) #endif { - drmHandleEvent(pRADEONEnt->fd, >drmmode.event_context); + radeon_drm_handle_event(pRADEONEnt->fd, >event_context); } } @@ -2747,11 +2746,13 @@ void drmmode_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode) info->drmmode_inited = TRUE; if (pRADEONEnt->fd_wakeup_registered != serverGeneration) { #if HAVE_NOTIFY_FD - SetNotifyFd(pRADEONEnt->fd, drm_notify_fd, X_NOTIFY_READ, pScrn); + SetNotifyFd(pRADEONEnt->fd, drm_notify_fd, X_NOTIFY_READ, + >drmmode); #else AddGeneralSocket(pRADEONEnt->fd); RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, - drm_wakeup_handler, pScrn); + drm_wakeup_handler, + >drmmode); #endif pRADEONEnt->fd_wakeup_registered = serverGeneration; pRADEONEnt->fd_wakeup_ref = 1; diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c index 69474be24..3d2f4d157 100644 --- a/src/radeon_drm_queue.c +++ b/src/radeon_drm_queue.c @@ -40,6 +40,7 @@ struct radeon_drm_queue_entry { struct xorg_list list; +uint64_t usec; uint64_t id; uintptr_t seq; void *data; @@ -47,36 +48,73 @@ struct radeon_drm_queue_entry { xf86CrtcPtr crtc; radeon_drm_handler_proc handler; radeon_drm_abort_proc abort; +unsigned int frame; }; static int radeon_drm_queue_refcnt; static struct xorg_list radeon_drm_queue; +static struct xorg_list radeon_drm_flip_signalled; +static struct xorg_list radeon_drm_vblank_signalled; static uintptr_t radeon_drm_queue_seq; /* - * Handle a DRM event + * Process a DRM event */ static void -radeon_drm_queue_handler(int fd, unsigned int frame, unsigned int sec, -unsigned int usec, void *user_ptr) +radeon_drm_queue_handle_one(struct radeon_drm_queue_entry *e) { - uintptr_t seq = (uintptr_t)user_ptr; - struct radeon_drm_queue_entry *e, *tmp; - - xorg_list_for_each_entry_safe(e, tmp, _drm_queue, list) { - if (e->seq == seq) { - xorg_list_del(>list); - if (e->handler) - e->handler(e->crtc, frame, - (uint64_t)sec * 100 + usec, - e->data); - else - e->abort(e->crtc, e->data); - free(e); - break; - } +xorg_list_del(>list); +if (e->handler) { + e->handler(e->crtc, e->frame, e->usec, e->data); +} else + e->abort(e->crtc, e->data); +free(e); +} + +static void +radeon_drm_queue_handler(struct xorg_list *signalled, unsigned int frame, +unsigned int sec, unsigned int usec, void *user_ptr) +{ +uintptr_t seq = (uintptr_t)user_ptr; +struct radeon_drm_queue_entry *e, *tmp; + +xorg_list_for_each_entry_safe(e, tmp, _drm_queue, list) { + if (e->seq == seq) { + if (!e->handler) { + e->abort(e->crtc, e->data); + break; + } + + xorg_list_del(>list); + e->usec = (uint64_t)sec * 100 + usec; + e->frame = frame; + xorg_list_append(>list, signalled); + break; } +} +} + +/* + * Signal a DRM page flip
[PATCH xf86-video-ati 2/5] Add radeon_drm_wait_pending_flip function
From: Michel Dänzer Replacing the drmmode_crtc_wait_pending_event macro. (Ported from amdgpu commit 6029794e8a35417faf825491a89b85f713c77fc1) Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 21 - src/radeon_drm_queue.c | 13 + src/radeon_drm_queue.h | 1 + 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 2e8f91d6e..59fe9b7fc 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -100,13 +100,6 @@ RADEONZaphodStringMatches(ScrnInfoPtr pScrn, const char *s, char *output_name) } -/* Wait for the boolean condition to be FALSE */ -#define drmmode_crtc_wait_pending_event(drmmode_crtc, fd, condition) \ - do {} while ((condition) && \ -drmHandleEvent(fd, _crtc->drmmode->event_context) \ -> 0); - - static PixmapPtr drmmode_create_bo_pixmap(ScrnInfoPtr pScrn, int width, int height, int depth, int bpp, @@ -306,8 +299,7 @@ drmmode_do_crtc_dpms(xf86CrtcPtr crtc, int mode) if (drmmode_crtc->dpms_mode == DPMSModeOn && mode != DPMSModeOn) { uint32_t seq; - drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd, - drmmode_crtc->flip_pending); + radeon_drm_wait_pending_flip(crtc); /* * On->Off transition: record the last vblank time, @@ -918,8 +910,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, goto done; } - drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd, - drmmode_crtc->flip_pending); + radeon_drm_wait_pending_flip(crtc); if (!drmmode_set_mode(crtc, fb, mode, x, y)) goto done; @@ -1772,14 +1763,11 @@ drmmode_output_set_tear_free(RADEONEntPtr pRADEONEnt, drmmode_output->tear_free = tear_free; if (crtc) { - drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; - /* Wait for pending flips before drmmode_set_mode_major calls * drmmode_crtc_update_tear_free, to prevent a nested * drmHandleEvent call, which would hang */ - drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd, - drmmode_crtc->flip_pending); + radeon_drm_wait_pending_flip(crtc); drmmode_set_mode_major(crtc, >mode, crtc->rotation, crtc->x, crtc->y); } @@ -3370,8 +3358,7 @@ Bool radeon_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, radeon_cs_flush_indirect(crtc->scrn); if (drmmode_crtc->scanout_update_pending) { - drmmode_crtc_wait_pending_event(drmmode_crtc, pRADEONEnt->fd, - drmmode_crtc->flip_pending); + radeon_drm_wait_pending_flip(crtc); radeon_drm_abort_entry(drmmode_crtc->scanout_update_pending); drmmode_crtc->scanout_update_pending = 0; } diff --git a/src/radeon_drm_queue.c b/src/radeon_drm_queue.c index bff010fa3..69474be24 100644 --- a/src/radeon_drm_queue.c +++ b/src/radeon_drm_queue.c @@ -177,6 +177,19 @@ radeon_drm_abort_id(uint64_t id) } } +/* + * Wait for pending page flip on given CRTC to complete + */ +void radeon_drm_wait_pending_flip(xf86CrtcPtr crtc) +{ +drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; +RADEONEntPtr pRADEONEnt = RADEONEntPriv(crtc->scrn); +drmmode_ptr drmmode = drmmode_crtc->drmmode; + +while (drmmode_crtc->flip_pending && + drmHandleEvent(pRADEONEnt->fd, >event_context) > 0); +} + /* * Initialize the DRM event queue */ diff --git a/src/radeon_drm_queue.h b/src/radeon_drm_queue.h index b6aab37c5..96f88bd35 100644 --- a/src/radeon_drm_queue.h +++ b/src/radeon_drm_queue.h @@ -47,6 +47,7 @@ uintptr_t radeon_drm_queue_alloc(xf86CrtcPtr crtc, ClientPtr client, void radeon_drm_abort_client(ClientPtr client); void radeon_drm_abort_entry(uintptr_t seq); void radeon_drm_abort_id(uint64_t id); +void radeon_drm_wait_pending_flip(xf86CrtcPtr crtc); void radeon_drm_queue_init(ScrnInfoPtr scrn); void radeon_drm_queue_close(ScrnInfoPtr scrn); -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/5] drm/nouveau+amdgpu: add DP CEC-Tunneling-over-AUX
On Thu, Aug 16, 2018 at 6:56 AM Hans Verkuil wrote: > > From: Hans Verkuil > > Now that the DisplayPort CEC-Tunneling-over-AUX drm+i915 support > has been merged in the mainline kernel it is time to roll this > out to nouveau and amdgpu as well. > > I combined both in the same patch series since both depend on the > same first patch, the comments in this cover letter apply to both > and the implementation is also very similar (and simple). > > As mentioned, the first patch is required for this: it adds checks that > the drm_dp_cec functions are called with a working aux implementation. > These checks weren't necessary for the i915, but nouveau and amdgpu > require them. > > The next two patches update a comment in drm_dp_cec.c and fix a bug > in sideband AUX handling that I found while researching CEC Tunneling > over an MST hub. It's there to prevent others from stumbling over the > same bug in the future. > > The fourth patch adds support for CEC to the nouveau driver. > > The last patch adds support for CEC to the amdgpu driver. However, there > appear to be two classes of amdgpu hardware: as a discrete GPU or > integrated. I only have a discrete GPU, so I can't test the integrated > GPU support and I only implemented this for the discrete GPU case. > > If someone has the integrated GPU and wants to get this working and is > willing to do some testing, then please contact me. It shouldn't be > difficult. You will likely have to buy a working DP-to-HDMI adapter. > See https://hverkuil.home.xs4all.nl/cec-status.txt for a (sadly very > short) list of working adapters. Actually you added support for APUs as well. In amdgpu, there are two sets of modesetting code, an older less featured version (amd/amdgpu/dce*.c) and the newer more featured code (amd/display/*). Newer asics (vega and raven) are only supported by DC. Older asics are supported by both. Eventually we'd like to remove the older modesetting code. I'm not really a CEC expert, but the patches look pretty straight forward to me. Series is: Acked-by: Alex Deucher > > Note that I may be completely off-base regarding what atombios_dp.c > does, it's the first time I ever looked at amdgpu code. > > Two notes on CEC-Tunneling-over-AUX: > > 1) You need to be very careful about which USB-C/DP/mini-DP to HDMI >adapters you buy. Only a few support this feature correctly today. >Known chipsets that support this are Parade PS175 & PS176 and >MegaChips 2900. Unfortunately almost all Parade-based adapters >do not hook up the HDMI CEC pin to the chip, making them useless >for this. The Parade reference design was only recently changed >to hook up this pin, so perhaps this situation will change for >new Parade-based adapters. > >Adapters based on the new MegaChips 2900 fare much better: it >appears that their reference design *does* hook up the CEC pin. >Club3D has adapters using this device for USB-C, DP and mini-DP >to HDMI, and they all work fine. > >If anyone finds other adapters that work besides those I list >in https://hverkuil.home.xs4all.nl/cec-status.txt, please let >me know and I'll add them to the list. > >Linux is the first OS that supports this feature, so I am >not surprised that HW support for this has been poor. Hopefully >this will change going forward. BTW, note the irony that CEC is >now supported for DP-to-HDMI adapters, but not for the native >HDMI ports on NVIDIA/AMD/Intel GPUs. > > 2) CEC-Tunneling does not work (yet?) if there is an MST hub in >between. I'm still researching this but this might be a limitation >of MST. > > Regards, > > Hans > > Hans Verkuil (5): > drm_dp_cec: check that aux has a transfer function > drm_dp_cec: add note about good MegaChips 2900 CEC support > drm_dp_mst_topology: fix broken > drm_dp_sideband_parse_remote_dpcd_read() > drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support > drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support > > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++-- > .../display/amdgpu_dm/amdgpu_dm_mst_types.c| 2 ++ > drivers/gpu/drm/drm_dp_cec.c | 18 +- > drivers/gpu/drm/drm_dp_mst_topology.c | 1 + > drivers/gpu/drm/nouveau/nouveau_connector.c| 17 +++-- > 5 files changed, 46 insertions(+), 5 deletions(-) > > -- > 2.18.0 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH xf86-video-amdgpu] Use correct FB handle in amdgpu_do_pageflip
On Thu, Aug 16, 2018 at 10:36 AM Michel Dänzer wrote: > > From: Michel Dänzer > > We were always using the handle of the client provided FB, which > prevented RandR transforms from working, and could result in a black > screen. > > Fixes: 9b6782c821e0 "Store FB for each CRTC in drmmode_flipdata_rec" > Signed-off-by: Michel Dänzer Good catch! Reviewed-by: Alex Deucher > --- > src/drmmode_display.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/drmmode_display.c b/src/drmmode_display.c > index e58e15d7b..be0e6b875 100644 > --- a/src/drmmode_display.c > +++ b/src/drmmode_display.c > @@ -3974,7 +3974,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr > client, > if (crtc == ref_crtc) { > if (drmmode_page_flip_target_absolute(pAMDGPUEnt, > drmmode_crtc, > - fb->handle, > + > flipdata->fb[i]->handle, > flip_flags, > drm_queue_seq, > target_msc) != > 0) > @@ -3982,7 +3982,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr > client, > } else { > if (drmmode_page_flip_target_relative(pAMDGPUEnt, > drmmode_crtc, > - fb->handle, > + > flipdata->fb[i]->handle, > flip_flags, > drm_queue_seq, > 0) != 0) > goto flip_error; > -- > 2.18.0 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-amdgpu] Store FB for each CRTC in drmmode_flipdata_rec
On 2018-08-10 09:06 AM, Johannes Hirte wrote: > On 2018 Jul 27, Michel Dänzer wrote: >> From: Michel Dänzer >> >> We were only storing the FB provided by the client, but on CRTCs with >> TearFree enabled, we use a separate FB. This could cause >> drmmode_flip_handler to fail to clear drmmode_crtc->flip_pending, which >> could result in a hang when waiting for the pending flip to complete. We >> were trying to avoid that by always clearing drmmode_crtc->flip_pending >> when TearFree is enabled, but that wasn't reliable, because >> drmmode_crtc->tear_free can already be FALSE at this point when >> disabling TearFree. >> >> Now that we're keeping track of each CRTC's flip FB separately, >> drmmode_flip_handler can reliably clear flip_pending, and we no longer >> need the TearFree hack. >> >> Signed-off-by: Michel Dänzer > > Since this change I get a black screen when login into KDE Plasma. I > have to switch to linux console and back for getting the X11 screen. > Additional the Xorg.log is spammed with: > > [ 189.744] (WW) AMDGPU(0): get vblank counter failed: Invalid argument > [ 189.828] (WW) AMDGPU(0): flip queue failed in amdgpu_scanout_flip: Device > or resource busy, TearFree inactive until next modeset > [ 189.828] (WW) AMDGPU(0): drmmode_wait_vblank failed for scanout update: > Invalid argument > [ 189.828] (WW) AMDGPU(0): drmmode_wait_vblank failed for scanout update: > Invalid argument > > The "flip queue failed" message appears only once, the other two are > much more often. > > System is a Carrizo A10-8700B, kernel 4.17.13 + this patch: > https://bugzilla.kernel.org/attachment.cgi?id=276173 Does https://patchwork.freedesktop.org/patch/244860/ fix it? -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH xf86-video-amdgpu] Use correct FB handle in amdgpu_do_pageflip
From: Michel Dänzer We were always using the handle of the client provided FB, which prevented RandR transforms from working, and could result in a black screen. Fixes: 9b6782c821e0 "Store FB for each CRTC in drmmode_flipdata_rec" Signed-off-by: Michel Dänzer --- src/drmmode_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index e58e15d7b..be0e6b875 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -3974,7 +3974,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, if (crtc == ref_crtc) { if (drmmode_page_flip_target_absolute(pAMDGPUEnt, drmmode_crtc, - fb->handle, + flipdata->fb[i]->handle, flip_flags, drm_queue_seq, target_msc) != 0) @@ -3982,7 +3982,7 @@ Bool amdgpu_do_pageflip(ScrnInfoPtr scrn, ClientPtr client, } else { if (drmmode_page_flip_target_relative(pAMDGPUEnt, drmmode_crtc, - fb->handle, + flipdata->fb[i]->handle, flip_flags, drm_queue_seq, 0) != 0) goto flip_error; -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH libdrm 2/2] amdgpu: Use char* pointers in amdgpu_find_bo_by_cpu_mapping
On 2018-08-15 03:07 AM, Zhang, Jerry (Junwei) wrote: > On 08/14/2018 05:58 PM, Michel Dänzer wrote: >> From: Michel Dänzer >> >> Arithmetic using void* pointers isn't defined by the C standard, only as >> a GCC extension. Avoids compiler warnings: >> >> ../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’: >> ../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ >> used in arithmetic [-Wpointer-arith] >> if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) >> ^ >> ../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ >> used in subtraction [-Wpointer-arith] >> *offset_in_bo = cpu - bo->cpu_ptr; >> ^ >> >> Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping >> (v2)") >> Signed-off-by: Michel Dänzer >> --- >> amdgpu/amdgpu.h | 2 +- >> amdgpu/amdgpu_bo.c | 7 --- >> 2 files changed, 5 insertions(+), 4 deletions(-) >> >> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h >> index a8c353c6..f2bdeb95 100644 >> --- a/amdgpu/amdgpu.h >> +++ b/amdgpu/amdgpu.h >> @@ -695,7 +695,7 @@ int >> amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev, >> * >> */ >> int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, >> - void *cpu, >> + char *cpu, > > Shall we cast the cpu pointer when do arithmetic and keep the arguments > as other cpu ponter? > > e.g. > @@ -565,14 +565,15 @@ int > amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, > bo = handle_table_lookup(>bo_handles, i); > if (!bo || !bo->cpu_ptr || size > bo->alloc_size) > continue; > - if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + > bo->alloc_size)) > + if (cpu >= bo->cpu_ptr && > + (uint64_t)cpu < ((uint64_t)bo->cpu_ptr + > bo->alloc_size)) > break; > } > > if (i < dev->bo_handles.max_key) { > atomic_inc(>refcount); > *buf_handle = bo; > - *offset_in_bo = cpu - bo->cpu_ptr; > + *offset_in_bo = (uint64_t)cpu - (uint64_t)bo->cpu_ptr; > } else { > *buf_handle = NULL; > *offset_in_bo = 0; Thanks for the suggestion, I sent out a v2 patch along those lines (though slightly different). -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v2 libdrm] amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping
From: Michel Dänzer Arithmetic using void* pointers isn't defined by the C standard, only as a GCC extension. Avoids compiler warnings: ../../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’: ../../amdgpu/amdgpu_bo.c:554:48: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith] if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) ^ ../../amdgpu/amdgpu_bo.c:561:23: warning: pointer of type ‘void *’ used in subtraction [-Wpointer-arith] *offset_in_bo = cpu - bo->cpu_ptr; ^ v2: Use uintptr_t instead of char*, don't change function signature (Junwei Zhang) Fixes: 4d454424e1f2 ("amdgpu: add a function to find bo by cpu mapping (v2)") Signed-off-by: Michel Dänzer --- amdgpu/amdgpu_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index 86d1c143..8efd014e 100644 --- a/amdgpu/amdgpu_bo.c +++ b/amdgpu/amdgpu_bo.c @@ -551,14 +551,15 @@ int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, bo = handle_table_lookup(>bo_handles, i); if (!bo || !bo->cpu_ptr || size > bo->alloc_size) continue; - if (cpu >= bo->cpu_ptr && cpu < (bo->cpu_ptr + bo->alloc_size)) + if (cpu >= bo->cpu_ptr && + cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size)) break; } if (i < dev->bo_handles.max_key) { atomic_inc(>refcount); *buf_handle = bo; - *offset_in_bo = cpu - bo->cpu_ptr; + *offset_in_bo = (uintptr_t)cpu - (uintptr_t)bo->cpu_ptr; } else { *buf_handle = NULL; *offset_in_bo = 0; -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu/display: add support for LVDS (v5)
This adds support for LVDS displays. v2: add support for spread spectrum, sink detect v3: clean up enable_lvds_output v4: fix up link_detect v5: remove assert on 888 format Bug: https://bugs.freedesktop.org/show_bug.cgi?id=105880 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 45 ++ .../gpu/drm/amd/display/dc/dce/dce_clock_source.c | 10 + .../gpu/drm/amd/display/dc/dce/dce_clock_source.h | 2 + .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 34 .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h | 6 +++ .../drm/amd/display/dc/dce/dce_stream_encoder.c| 24 .../gpu/drm/amd/display/dc/inc/hw/link_encoder.h | 3 ++ .../gpu/drm/amd/display/dc/inc/hw/stream_encoder.h | 4 ++ drivers/gpu/drm/amd/display/include/signal_types.h | 5 +++ 10 files changed, 135 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1828e4382b24..818b5ec32f37 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3360,6 +3360,8 @@ static int to_drm_connector_type(enum signal_type st) return DRM_MODE_CONNECTOR_HDMIA; case SIGNAL_TYPE_EDP: return DRM_MODE_CONNECTOR_eDP; + case SIGNAL_TYPE_LVDS: + return DRM_MODE_CONNECTOR_LVDS; case SIGNAL_TYPE_RGB: return DRM_MODE_CONNECTOR_VGA; case SIGNAL_TYPE_DISPLAY_PORT: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 981f7cbd31cc..0f044fd5baf4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -203,6 +203,11 @@ static bool detect_sink(struct dc_link *link, enum dc_connection_type *type) uint32_t is_hpd_high = 0; struct gpio *hpd_pin; + if (link->connector_signal == SIGNAL_TYPE_LVDS) { + *type = dc_connection_single; + return true; + } + /* todo: may need to lock gpio access */ hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service); if (hpd_pin == NULL) @@ -616,6 +621,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) link->local_sink) return true; + if (link->connector_signal == SIGNAL_TYPE_LVDS && + link->local_sink) + return true; + prev_sink = link->local_sink; if (prev_sink != NULL) { dc_sink_retain(prev_sink); @@ -649,6 +658,12 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) break; } + case SIGNAL_TYPE_LVDS: { + sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C; + sink_caps.signal = SIGNAL_TYPE_LVDS; + break; + } + case SIGNAL_TYPE_EDP: { detect_edp_sink_caps(link); sink_caps.transaction_type = @@ -1087,6 +1102,9 @@ static bool construct( dal_irq_get_rx_source(hpd_gpio); } break; + case CONNECTOR_ID_LVDS: + link->connector_signal = SIGNAL_TYPE_LVDS; + break; default: DC_LOG_WARNING("Unsupported Connector type:%d!\n", link->link_id.id); goto create_fail; @@ -1920,6 +1938,24 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) dal_ddc_service_read_scdc_data(link->ddc); } +static void enable_link_lvds(struct pipe_ctx *pipe_ctx) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; + + if (stream->phy_pix_clk == 0) + stream->phy_pix_clk = stream->timing.pix_clk_khz; + + memset(>sink->link->cur_link_settings, 0, + sizeof(struct dc_link_settings)); + + link->link_enc->funcs->enable_lvds_output( + link->link_enc, + pipe_ctx->clock_source->id, + stream->phy_pix_clk); + +} + /enable_link***/ static enum dc_status enable_link( struct dc_state *state, @@ -1943,6 +1979,10 @@ static enum dc_status enable_link( enable_link_hdmi(pipe_ctx); status = DC_OK; break; + case SIGNAL_TYPE_LVDS: + enable_link_lvds(pipe_ctx); + status = DC_OK; + break; case SIGNAL_TYPE_VIRTUAL: status = DC_OK; break; @@ -2492,6 +2532,11 @@ void
Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
Am 16.08.2018 um 15:05 schrieb Emily Deng: To avoid the tlb flush not interrupted by world switch, use kiq and one command to do tlb invalidate. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6265b88..19ef7711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 20 + int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 21adb1b6..3885636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,9 +22,6 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -#define MAX_KIQ_REG_TRY 20 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ed467de..6c164bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) return req; } +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = >gfx.kiq; + struct amdgpu_ring *ring = >ring; + + if (!ring->ready) + return -EINVAL; + + spin_lock_irqsave(>ring_lock, flags); + + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, ); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(>ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may +* block gpu_recover() routine forever, e.g. this virt_kiq_rreg +* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will +* never return if we keep waiting in virt_kiq_rreg, which cause +* gpu_recover() hang there. +* +* also don't wait anymore for IRQ context +* */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq; + + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return 0; + +failed_kiq: + pr_err("failed to invalidate tlb with kiq\n"); + return r; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; + int r; spin_lock(>gmc.invalidate_lock); @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = >vmhub[i]; u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + spin_unlock(>gmc.invalidate_lock); Well just remove the lock altogether. Taking it and dropping it again immediately doesn't do anything useful. + r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); + spin_lock(>gmc.invalidate_lock); + if (!r) + continue; + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* Busy wait for ACK.*/ Since you now use the KIQ to wait for the TLB flush I think we can now remove the MMIO implementation. Regards, Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking
Am 16.08.2018 um 15:05 schrieb Emily Deng: Unify bare metal and sriov, and add firmware checking for reg write and reg wait unify command. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 47 - 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 53e9e2a..f172e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -274,6 +274,8 @@ struct amdgpu_gfx { uint32_trlc_srls_feature_version; uint32_tmec_feature_version; uint32_tmec2_feature_version; + boolmec_fw_write_wait; + boolme_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsignednum_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 76d979e..76e8973 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.me_fw_write_wait = false; + adev->gfx.mec_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_VEGA10: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; Well that looks like the correct solution to me. Only a nit pick left to fix, the coding style looks a bit off. For example the "if" should be indented so that "(adev.." in both lines is on the same column: if ((adev ) && (adev And the "adev->gfx.me_fw_write_wait = true;" is to far to the right. With that fixed the patch is Acked-by: Christian König . Regards, Christian. + + if ((adev->gfx.mec_fw_version >= 0x0193) && (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA12: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0196) && (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA20: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0197) && (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_RAVEN: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0192) && (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", @@ -4348,8 +4390,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t ref, uint32_t mask) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? + adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; - if
[PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
To avoid the tlb flush not interrupted by world switch, use kiq and one command to do tlb invalidate. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 60 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6265b88..19ef7711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 20 + int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 21adb1b6..3885636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,9 +22,6 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -#define MAX_KIQ_REG_TRY 20 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ed467de..6c164bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) return req; } +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = >gfx.kiq; + struct amdgpu_ring *ring = >ring; + + if (!ring->ready) + return -EINVAL; + + spin_lock_irqsave(>ring_lock, flags); + + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, ); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(>ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may +* block gpu_recover() routine forever, e.g. this virt_kiq_rreg +* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will +* never return if we keep waiting in virt_kiq_rreg, which cause +* gpu_recover() hang there. +* +* also don't wait anymore for IRQ context +* */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq; + + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return 0; + +failed_kiq: + pr_err("failed to invalidate tlb with kiq\n"); + return r; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; + int r; spin_lock(>gmc.invalidate_lock); @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = >vmhub[i]; u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + spin_unlock(>gmc.invalidate_lock); + r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); + spin_lock(>gmc.invalidate_lock); + if (!r) + continue; + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* Busy wait for ACK.*/ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: Remove the sriov checking and add firmware checking
Unify bare metal and sriov, and add firmware checking for reg write and reg wait unify command. Signed-off-by: Emily Deng --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 47 - 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 53e9e2a..f172e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -274,6 +274,8 @@ struct amdgpu_gfx { uint32_trlc_srls_feature_version; uint32_tmec_feature_version; uint32_tmec2_feature_version; + boolmec_fw_write_wait; + boolme_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsignednum_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 76d979e..76e8973 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -482,6 +482,47 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.me_fw_write_wait = false; + adev->gfx.mec_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_VEGA10: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0193) && (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA12: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0196) && (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA20: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x00b2) && (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0197) && (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_RAVEN: + if ((adev->gfx.me_fw_version >= 0x009c) && (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x00b1) && (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x0192) && (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -716,6 +757,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", @@ -4348,8 +4390,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t ref, uint32_t mask) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? + adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; - if (amdgpu_sriov_vf(ring->adev)) + if (fw_version_ok) gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, ref, mask, 0x20); else -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: remove fulll access for suspend phase1
On Thu, Aug 16, 2018 at 4:26 AM Yintian Tao wrote: > > There is no need for gpu full access for suspend phase1 > because under virtualization there is no hw register access > for dce block. > > Change-Id: Ie1154e2065182ba968732af87f866f11141a102b > Signed-off-by: Yintian Tao Acked-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 -- > 1 file changed, 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index d84ac23..5da20b1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1968,9 +1968,6 @@ static int amdgpu_device_ip_suspend_phase1(struct > amdgpu_device *adev) > { > int i, r; > > - if (amdgpu_sriov_vf(adev)) > - amdgpu_virt_request_full_gpu(adev, false); > - > for (i = adev->num_ip_blocks - 1; i >= 0; i--) { > if (!adev->ip_blocks[i].status.valid) > continue; > @@ -1995,9 +1992,6 @@ static int amdgpu_device_ip_suspend_phase1(struct > amdgpu_device *adev) > } > } > > - if (amdgpu_sriov_vf(adev)) > - amdgpu_virt_release_full_gpu(adev, false); > - > return 0; > } > > -- > 2.7.4 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/5] drm_dp_cec: add note about good MegaChips 2900 CEC support
From: Hans Verkuil A big problem with DP CEC-Tunneling-over-AUX is that it is tricky to find adapters with a chipset that supports this AND where the manufacturer actually connected the HDMI CEC line to the chipset. Add a mention of the MegaChips 2900 chipset which seems to support this feature well. Signed-off-by: Hans Verkuil --- drivers/gpu/drm/drm_dp_cec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_cec.c b/drivers/gpu/drm/drm_dp_cec.c index 1407b13a8d5d..8a718f85079a 100644 --- a/drivers/gpu/drm/drm_dp_cec.c +++ b/drivers/gpu/drm/drm_dp_cec.c @@ -16,7 +16,9 @@ * here. Quite a few active (mini-)DP-to-HDMI or USB-C-to-HDMI adapters * have a converter chip that supports CEC-Tunneling-over-AUX (usually the * Parade PS176), but they do not wire up the CEC pin, thus making CEC - * useless. + * useless. Note that MegaChips 2900-based adapters appear to have good + * support for CEC tunneling. Those adapters that I have tested using + * this chipset all have the CEC line connected. * * Sadly there is no way for this driver to know this. What happens is * that a /dev/cecX device is created that is isolated and unable to see -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/5] drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support
From: Hans Verkuil Add DisplayPort CEC-Tunneling-over-AUX support to nouveau. Signed-off-by: Hans Verkuil --- drivers/gpu/drm/nouveau/nouveau_connector.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 51932c72334e..eb4f766b5958 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -400,8 +400,10 @@ nouveau_connector_destroy(struct drm_connector *connector) kfree(nv_connector->edid); drm_connector_unregister(connector); drm_connector_cleanup(connector); - if (nv_connector->aux.transfer) + if (nv_connector->aux.transfer) { + drm_dp_cec_unregister_connector(_connector->aux); drm_dp_aux_unregister(_connector->aux); + } kfree(connector); } @@ -608,6 +610,7 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nouveau_connector_set_encoder(connector, nv_encoder); conn_status = connector_status_connected; + drm_dp_cec_set_edid(_connector->aux, nv_connector->edid); goto out; } @@ -1108,11 +,14 @@ nouveau_connector_hotplug(struct nvif_notify *notify) if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { NV_DEBUG(drm, "service %s\n", name); + drm_dp_cec_irq(_connector->aux); if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) nv50_mstm_service(nv_encoder->dp.mstm); } else { bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG); + if (!plugged) + drm_dp_cec_unset_edid(_connector->aux); NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) { if (!plugged) @@ -1302,7 +1308,6 @@ nouveau_connector_create(struct drm_device *dev, int index) kfree(nv_connector); return ERR_PTR(ret); } - funcs = _connector_funcs; break; default: @@ -1356,6 +1361,14 @@ nouveau_connector_create(struct drm_device *dev, int index) break; } + switch (type) { + case DRM_MODE_CONNECTOR_DisplayPort: + case DRM_MODE_CONNECTOR_eDP: + drm_dp_cec_register_connector(_connector->aux, + connector->name, dev->dev); + break; + } + ret = nvif_notify_init(>disp.object, nouveau_connector_hotplug, true, NV04_DISP_NTFY_CONN, &(struct nvif_notify_conn_req_v0) { -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/5] drm_dp_mst_topology: fix broken drm_dp_sideband_parse_remote_dpcd_read()
From: Hans Verkuil When parsing the reply of a DP_REMOTE_DPCD_READ DPCD command the result is wrong due to a missing idx increment. This was never noticed since DP_REMOTE_DPCD_READ is currently not used, but if you enable it, then it is all wrong. Signed-off-by: Hans Verkuil --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 7780567aa669..5ff1d79b86c4 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -439,6 +439,7 @@ static bool drm_dp_sideband_parse_remote_dpcd_read(struct drm_dp_sideband_msg_rx if (idx > raw->curlen) goto fail_len; repmsg->u.remote_dpcd_read_ack.num_bytes = raw->msg[idx]; + idx++; if (idx > raw->curlen) goto fail_len; -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/5] drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support
From: Hans Verkuil Add DisplayPort CEC-Tunneling-over-AUX support to amdgpu. Signed-off-by: Hans Verkuil --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++-- .../drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 34f34823bab5..77898c95bef6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -898,6 +898,7 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) aconnector->dc_sink = sink; if (sink->dc_edid.length == 0) { aconnector->edid = NULL; + drm_dp_cec_unset_edid(>dm_dp_aux.aux); } else { aconnector->edid = (struct edid *) sink->dc_edid.raw_edid; @@ -905,10 +906,13 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) drm_connector_update_edid_property(connector, aconnector->edid); + drm_dp_cec_set_edid(>dm_dp_aux.aux, + aconnector->edid); } amdgpu_dm_add_sink_to_freesync_module(connector, aconnector->edid); } else { + drm_dp_cec_unset_edid(>dm_dp_aux.aux); amdgpu_dm_remove_sink_from_freesync_module(connector); drm_connector_update_edid_property(connector, NULL); aconnector->num_modes = 0; @@ -1059,12 +1063,16 @@ static void handle_hpd_rx_irq(void *param) drm_kms_helper_hotplug_event(dev); } } + if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) || - (dc_link->type == dc_connection_mst_branch)) + (dc_link->type == dc_connection_mst_branch)) { dm_handle_hpd_rx_irq(aconnector); + } - if (dc_link->type != dc_connection_mst_branch) + if (dc_link->type != dc_connection_mst_branch) { + drm_dp_cec_irq(>dm_dp_aux.aux); mutex_unlock(>hpd_lock); + } } static void register_hpd_handlers(struct amdgpu_device *adev) @@ -2732,6 +2740,7 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) dm->backlight_dev = NULL; } #endif + drm_dp_cec_unregister_connector(>dm_dp_aux.aux); drm_connector_unregister(connector); drm_connector_cleanup(connector); kfree(connector); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9a300732ba37..18a3a6e5ffa0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -496,6 +496,8 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, aconnector->dm_dp_aux.ddc_service = aconnector->dc_link->ddc; drm_dp_aux_register(>dm_dp_aux.aux); + drm_dp_cec_register_connector(>dm_dp_aux.aux, + aconnector->base.name, dm->adev->dev); aconnector->mst_mgr.cbs = _mst_cbs; drm_dp_mst_topology_mgr_init( >mst_mgr, -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 0/5] drm/nouveau+amdgpu: add DP CEC-Tunneling-over-AUX
From: Hans Verkuil Now that the DisplayPort CEC-Tunneling-over-AUX drm+i915 support has been merged in the mainline kernel it is time to roll this out to nouveau and amdgpu as well. I combined both in the same patch series since both depend on the same first patch, the comments in this cover letter apply to both and the implementation is also very similar (and simple). As mentioned, the first patch is required for this: it adds checks that the drm_dp_cec functions are called with a working aux implementation. These checks weren't necessary for the i915, but nouveau and amdgpu require them. The next two patches update a comment in drm_dp_cec.c and fix a bug in sideband AUX handling that I found while researching CEC Tunneling over an MST hub. It's there to prevent others from stumbling over the same bug in the future. The fourth patch adds support for CEC to the nouveau driver. The last patch adds support for CEC to the amdgpu driver. However, there appear to be two classes of amdgpu hardware: as a discrete GPU or integrated. I only have a discrete GPU, so I can't test the integrated GPU support and I only implemented this for the discrete GPU case. If someone has the integrated GPU and wants to get this working and is willing to do some testing, then please contact me. It shouldn't be difficult. You will likely have to buy a working DP-to-HDMI adapter. See https://hverkuil.home.xs4all.nl/cec-status.txt for a (sadly very short) list of working adapters. Note that I may be completely off-base regarding what atombios_dp.c does, it's the first time I ever looked at amdgpu code. Two notes on CEC-Tunneling-over-AUX: 1) You need to be very careful about which USB-C/DP/mini-DP to HDMI adapters you buy. Only a few support this feature correctly today. Known chipsets that support this are Parade PS175 & PS176 and MegaChips 2900. Unfortunately almost all Parade-based adapters do not hook up the HDMI CEC pin to the chip, making them useless for this. The Parade reference design was only recently changed to hook up this pin, so perhaps this situation will change for new Parade-based adapters. Adapters based on the new MegaChips 2900 fare much better: it appears that their reference design *does* hook up the CEC pin. Club3D has adapters using this device for USB-C, DP and mini-DP to HDMI, and they all work fine. If anyone finds other adapters that work besides those I list in https://hverkuil.home.xs4all.nl/cec-status.txt, please let me know and I'll add them to the list. Linux is the first OS that supports this feature, so I am not surprised that HW support for this has been poor. Hopefully this will change going forward. BTW, note the irony that CEC is now supported for DP-to-HDMI adapters, but not for the native HDMI ports on NVIDIA/AMD/Intel GPUs. 2) CEC-Tunneling does not work (yet?) if there is an MST hub in between. I'm still researching this but this might be a limitation of MST. Regards, Hans Hans Verkuil (5): drm_dp_cec: check that aux has a transfer function drm_dp_cec: add note about good MegaChips 2900 CEC support drm_dp_mst_topology: fix broken drm_dp_sideband_parse_remote_dpcd_read() drm/nouveau: add DisplayPort CEC-Tunneling-over-AUX support drm/amdgpu: add DisplayPort CEC-Tunneling-over-AUX support .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++-- .../display/amdgpu_dm/amdgpu_dm_mst_types.c| 2 ++ drivers/gpu/drm/drm_dp_cec.c | 18 +- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + drivers/gpu/drm/nouveau/nouveau_connector.c| 17 +++-- 5 files changed, 46 insertions(+), 5 deletions(-) -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/5] drm_dp_cec: check that aux has a transfer function
From: Hans Verkuil If aux->transfer == NULL, then just return without doing anything. In that case the function is likely called for a non-(e)DP connector. This never happened for the i915 driver, but the nouveau and amdgpu drivers need this check. The alternative would be to add this check in those drivers before every drm_dp_cec call, but it makes sense to check it in the drm_dp_cec functions to prevent a kernel oops. Signed-off-by: Hans Verkuil --- drivers/gpu/drm/drm_dp_cec.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_cec.c b/drivers/gpu/drm/drm_dp_cec.c index 988513346e9c..1407b13a8d5d 100644 --- a/drivers/gpu/drm/drm_dp_cec.c +++ b/drivers/gpu/drm/drm_dp_cec.c @@ -238,6 +238,10 @@ void drm_dp_cec_irq(struct drm_dp_aux *aux) u8 cec_irq; int ret; + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; + mutex_lock(>cec.lock); if (!aux->cec.adap) goto unlock; @@ -293,6 +297,10 @@ void drm_dp_cec_set_edid(struct drm_dp_aux *aux, const struct edid *edid) unsigned int num_las = 1; u8 cap; + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; + #ifndef CONFIG_MEDIA_CEC_RC /* * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by @@ -361,6 +369,10 @@ EXPORT_SYMBOL(drm_dp_cec_set_edid); */ void drm_dp_cec_unset_edid(struct drm_dp_aux *aux) { + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; + cancel_delayed_work_sync(>cec.unregister_work); mutex_lock(>cec.lock); @@ -404,6 +416,8 @@ void drm_dp_cec_register_connector(struct drm_dp_aux *aux, const char *name, struct device *parent) { WARN_ON(aux->cec.adap); + if (WARN_ON(!aux->transfer)) + return; aux->cec.name = name; aux->cec.parent = parent; INIT_DELAYED_WORK(>cec.unregister_work, -- 2.18.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: rework ctx entity creation
On 2018年08月16日 16:11, Christian König wrote: Am 16.08.2018 um 04:07 schrieb zhoucm1: On 2018年08月15日 18:59, Christian König wrote: Use a fixed number of entities for each hardware IP. The number of compute entities is reduced to four, SDMA keeps it two entities and all other engines just expose one entity. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 30 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 36 ++-- 3 files changed, 190 insertions(+), 167 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0a6cd1202ee5..987b7f256463 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -27,8 +27,29 @@ #include "amdgpu.h" #include "amdgpu_sched.h" -#define to_amdgpu_ctx_ring(e) \ - container_of((e), struct amdgpu_ctx_ring, entity) +#define to_amdgpu_ctx_entity(e) \ + container_of((e), struct amdgpu_ctx_entity, entity) + +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { + [AMDGPU_HW_IP_GFX] = 1, + [AMDGPU_HW_IP_COMPUTE] = 4, Could you explain why reduct to four? otherwise it looks good to me. Currently we change the priority of the compute queues on the fly, but the idea is that we will have fixed high priority and low priority compute queues in the future. Yeah, I see that, feel free to add my RB: Reviewed-by: Chunming Zhou Regards, David Zhou We could as well say we have only 2 or 3 if the closed stack is fine with that. Regards, Christian. Thanks, David Zhou + [AMDGPU_HW_IP_DMA] = 2, + [AMDGPU_HW_IP_UVD] = 1, + [AMDGPU_HW_IP_VCE] = 1, + [AMDGPU_HW_IP_UVD_ENC] = 1, + [AMDGPU_HW_IP_VCN_DEC] = 1, + [AMDGPU_HW_IP_VCN_ENC] = 1, +}; + +static int amdgput_ctx_total_num_entities(void) +{ + unsigned i, num_entities = 0; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + num_entities += amdgpu_ctx_num_entities[i]; + + return num_entities; +} static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) @@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_ctx *ctx) { - struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS]; - unsigned i, j, num_sdma_rqs, num_comp_rqs; + unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned i, j; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; - kref_init(>refcount); - spin_lock_init(>ring_lock); - ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, + + ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; - mutex_init(>lock); + ctx->entities[0] = kcalloc(num_entities, + sizeof(struct amdgpu_ctx_entity), + GFP_KERNEL); + if (!ctx->entities[0]) { + r = -ENOMEM; + goto error_free_fences; + } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ctx->rings[i].sequence = 1; - ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i]; + for (i = 0; i < num_entities; ++i) { + struct amdgpu_ctx_entity *entity = >entities[0][i]; + + entity->sequence = 1; + entity->fences = >fences[amdgpu_sched_jobs * i]; } + for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) + ctx->entities[i] = ctx->entities[i - 1] + + amdgpu_ctx_num_entities[i - 1]; + + kref_init(>refcount); + spin_lock_init(>ring_lock); + mutex_init(>lock); ctx->reset_counter = atomic_read(>gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - num_sdma_rqs = 0; - num_comp_rqs = 0; - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - struct drm_sched_rq *rq; - - rq = >sched.sched_rq[priority]; - if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) - sdma_rqs[num_sdma_rqs++] = rq; - else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - comp_rqs[num_comp_rqs++] = rq; - } - - /* create context entity for each ring */ - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct drm_sched_rq
[PATCH] drm/amdgpu: remove fulll access for suspend phase1
There is no need for gpu full access for suspend phase1 because under virtualization there is no hw register access for dce block. Change-Id: Ie1154e2065182ba968732af87f866f11141a102b Signed-off-by: Yintian Tao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d84ac23..5da20b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1968,9 +1968,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_request_full_gpu(adev, false); - for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; @@ -1995,9 +1992,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) } } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, false); - return 0; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: rework ctx entity creation
Am 16.08.2018 um 04:07 schrieb zhoucm1: On 2018年08月15日 18:59, Christian König wrote: Use a fixed number of entities for each hardware IP. The number of compute entities is reduced to four, SDMA keeps it two entities and all other engines just expose one entity. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 30 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 36 ++-- 3 files changed, 190 insertions(+), 167 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0a6cd1202ee5..987b7f256463 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -27,8 +27,29 @@ #include "amdgpu.h" #include "amdgpu_sched.h" -#define to_amdgpu_ctx_ring(e) \ - container_of((e), struct amdgpu_ctx_ring, entity) +#define to_amdgpu_ctx_entity(e) \ + container_of((e), struct amdgpu_ctx_entity, entity) + +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { + [AMDGPU_HW_IP_GFX] = 1, + [AMDGPU_HW_IP_COMPUTE] = 4, Could you explain why reduct to four? otherwise it looks good to me. Currently we change the priority of the compute queues on the fly, but the idea is that we will have fixed high priority and low priority compute queues in the future. We could as well say we have only 2 or 3 if the closed stack is fine with that. Regards, Christian. Thanks, David Zhou + [AMDGPU_HW_IP_DMA] = 2, + [AMDGPU_HW_IP_UVD] = 1, + [AMDGPU_HW_IP_VCE] = 1, + [AMDGPU_HW_IP_UVD_ENC] = 1, + [AMDGPU_HW_IP_VCN_DEC] = 1, + [AMDGPU_HW_IP_VCN_ENC] = 1, +}; + +static int amdgput_ctx_total_num_entities(void) +{ + unsigned i, num_entities = 0; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + num_entities += amdgpu_ctx_num_entities[i]; + + return num_entities; +} static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) @@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_ctx *ctx) { - struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS]; - unsigned i, j, num_sdma_rqs, num_comp_rqs; + unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned i, j; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; - kref_init(>refcount); - spin_lock_init(>ring_lock); - ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, + + ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; - mutex_init(>lock); + ctx->entities[0] = kcalloc(num_entities, + sizeof(struct amdgpu_ctx_entity), + GFP_KERNEL); + if (!ctx->entities[0]) { + r = -ENOMEM; + goto error_free_fences; + } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ctx->rings[i].sequence = 1; - ctx->rings[i].fences = >fences[amdgpu_sched_jobs * i]; + for (i = 0; i < num_entities; ++i) { + struct amdgpu_ctx_entity *entity = >entities[0][i]; + + entity->sequence = 1; + entity->fences = >fences[amdgpu_sched_jobs * i]; } + for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) + ctx->entities[i] = ctx->entities[i - 1] + + amdgpu_ctx_num_entities[i - 1]; + + kref_init(>refcount); + spin_lock_init(>ring_lock); + mutex_init(>lock); ctx->reset_counter = atomic_read(>gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - num_sdma_rqs = 0; - num_comp_rqs = 0; - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - struct drm_sched_rq *rq; - - rq = >sched.sched_rq[priority]; - if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) - sdma_rqs[num_sdma_rqs++] = rq; - else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - comp_rqs[num_comp_rqs++] = rq; - } - - /* create context entity for each ring */ - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; + unsigned num_rings; + + switch (i) { + case AMDGPU_HW_IP_GFX: + rings[0] =
[PATCH] drm/amdpgu: access register without KIQ
there is no need to access register such as mmSMC_IND_INDEX_11 and mmSMC_IND_DATA_11 through KIQ because they are VF-copy. Change-Id: I88302dc5945e0b0fa2e6411081fb798aab4fdb5e Signed-off-by: Yintian Tao Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 29fbb91..5a30bf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -112,8 +112,8 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg) u32 r; spin_lock_irqsave(>smc_idx_lock, flags); - WREG32(mmSMC_IND_INDEX_11, (reg)); - r = RREG32(mmSMC_IND_DATA_11); + WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg)); + r = RREG32_NO_KIQ(mmSMC_IND_DATA_11); spin_unlock_irqrestore(>smc_idx_lock, flags); return r; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do invalidate tlb
Thanks all, I am modifying the patch, and testing. Best wishes Emily Deng >-Original Message- >From: Zhu, Rex >Sent: Thursday, August 16, 2018 2:25 PM >To: 'Alex Deucher' >Cc: Deng, Emily ; 'amd-gfx list' g...@lists.freedesktop.org> >Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do >invalidate tlb > > > >> -Original Message- >> From: Zhu, Rex >> Sent: Thursday, August 16, 2018 1:41 PM >> To: 'Alex Deucher' >> Cc: Deng, Emily ; amd-gfx list > g...@lists.freedesktop.org> >> Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq >> to do invalidate tlb >> >> >> >> > -Original Message- >> > From: Alex Deucher >> > Sent: Wednesday, August 15, 2018 10:14 PM >> > To: Zhu, Rex >> > Cc: Deng, Emily ; amd-gfx list > > g...@lists.freedesktop.org> >> > Subject: Re: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use >> > kiq to do invalidate tlb >> > >> > On Wed, Aug 15, 2018 at 9:56 AM Zhu, Rex wrote: >> > > >> > > >> > > >> > > > -Original Message- >> > > > From: amd-gfx On Behalf >> > > > Of Emily Deng >> > > > Sent: Wednesday, August 15, 2018 5:48 PM >> > > > To: amd-gfx@lists.freedesktop.org >> > > > Cc: Deng, Emily >> > > > Subject: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use >> > > > kiq to do invalidate tlb >> > > > >> > > > To avoid the tlb flush not interrupted by world switch, use kiq >> > > > and one command to do tlb invalidate. >> > > > >> > > > v2: >> > > > Add firmware version checking. >> > > > >> > > > v3: >> > > > Refine the code, and move the firmware checking into >> > > > gfx_v9_0_ring_emit_reg_write_reg_wait. >> > > > >> > > > SWDEV-161497 >> > > >> > > The "SWDEV-161497" is meanless. >> > > you can describe the issue or just remove the bug number. >> > > >> > > > >> > > > Signed-off-by: Emily Deng >> > > > --- >> > > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ >> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- >> > > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 15 +++- >> > > > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 61 >> > > > >> > > > 4 files changed, 79 insertions(+), 4 deletions(-) >> > > > >> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > > > index 07924d4..67b584b 100644 >> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > > > @@ -210,6 +210,10 @@ enum amdgpu_kiq_irq { >> > > > AMDGPU_CP_KIQ_IRQ_LAST >> > > > }; >> > > > >> > > > +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >> > > > +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >> > > > +#define MAX_KIQ_REG_TRY 20 >> > > > + >> > > > int amdgpu_device_ip_set_clockgating_state(void *dev, >> > > > enum amd_ip_block_type >> > > > block_type, >> > > > enum >> > > > amd_clockgating_state state); diff --git >> > > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> > > > index 21adb1b6..3885636 100644 >> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> > > > @@ -22,9 +22,6 @@ >> > > > */ >> > > > >> > > > #include "amdgpu.h" >> > > > -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >> > > > -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ - >> > #define >> > > > MAX_KIQ_REG_TRY 20 >> > > > >> > > > uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { diff >> > > > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> > > > index 76d979e..c9b3db4 100644 >> > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> > > > @@ -4348,8 +4348,21 @@ static void >> > > > gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, >> > > > uint32_t ref, >> > > > uint32_t mask) { >> > > > int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); >> > > > + struct amdgpu_device *adev = ring->adev; >> > > > + bool fw_version_ok = false; >> > > > >> > > > - if (amdgpu_sriov_vf(ring->adev)) >> > > > + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { >> > > > + if ((adev->gfx.me_fw_version >= 0x009c) && >> > > > + (adev- >> > > > >gfx.me_feature_version >= 42)) >> > > > + if ((adev->gfx.pfp_fw_version >= >> > > > + 0x00b1) && >> > > > (adev->gfx.pfp_feature_version >= 42)) >> > > > + fw_version_ok = true; >> > > > + } else { >> > > > + if ((adev->gfx.mec_fw_version >= 0x0193) && >> > > > + (adev- >> > > > >gfx.mec_feature_version >= 42)) >> > > > + fw_version_ok = true; >> > > > + } >> > > >> > > Maybe we can add a flag and set the flag when request_firmware. >> > >> > I
Re: [PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence
Am 15.08.2018 um 20:49 schrieb Felix Kuehling: On 2018-08-15 02:27 PM, Christian König wrote: Am 15.08.2018 um 20:17 schrieb Felix Kuehling: On 2018-08-15 03:02 AM, Christian König wrote: Hi Felix, yeah, you pretty much nailed it. The problem is that the array itself is RCU protected. This means that you can only copy the whole structure when you want to update it. The exception is reservation_object_add_shared() which only works because we replace an either signaled fence or a fence within the same context but a later sequence number. This also explains why this is only a band aid and the whole approach of removing fences doesn't work in general. At any time somebody could have taken an RCU reference to the old array, created a copy of it and is now still using this one. The only 100% correct solution would be to mark the existing fence as signaled and replace it everywhere else. Depends what you're trying to achieve. I think the problem you see is, that some reader may still be using the old reservation_object_list copy with the fence still in it. But, the remaining lifetime of the reservation_object_list copy is limited. If we wanted to be sure no more copies with the old fence exist, all we'd need to do is call synchronize_rcu. Maybe we need to do that before releasing the fence references, or release the fence reference in an RCU callback to be safe. The assumption that the fence would die with the array is what is incorrect here. I'm making no such assumption. The point is not to destroy the fence. Only to remove the fence reference from the reservation object. That is, we want any BO with this reservation object to stop checking or waiting for our eviction fence. Maybe I'm overthinking this, but let me explain the point once more. See reservation_object_wait_timeout_rcu() for an example of the problem I see here: seq = read_seqcount_begin(>seq); rcu_read_lock(); if (!dma_fence_get_rcu(lfence)) goto unlock_retry; ... rcu_read_unlock(); ... if (read_seqcount_retry(>seq, seq)) { dma_fence_put(fence); goto retry; } ... ret = dma_fence_wait_timeout(fence, intr, ret); In other words the RCU mechanism guarantees that we also wait for newly added fences, but it does not guarantee that we don't wait for a fence which is already removed. The lifetime of RCUed array object is limit, but there is absolutely no guarantee that somebody didn't made a copy of the fences. E.g. somebody could have called reservation_object_get_fences_rcu(), reservation_object_copy_fences() or a concurrent reservation_object_wait_timeout_rcu() is underway. That's fine. Then there will be additional fence references. When we remove a fence from a reservation object, we don't know and don't care who else is holding a reference to the fence anyway. This is no different. So the KFD implementation doesn't care that the fence is removed from a BO but somebody could still start to wait on it because of the BO? I mean it could be that in the worst case we race and stop a KFD process for no good reason. Regards, Christian. Regards, Felix That's also the reason why fences live for much longer than their signaling, e.g. somebody can have a reference to the fence object even hours after it is signaled. Regards, Christian. Regards, Felix Going to fix the copy error I made with the sequence number and send it out again. Regards, Christian. Am 14.08.2018 um 22:17 schrieb Felix Kuehling: [+Harish] I think this looks good for the most part. See one comment inline below. But bear with me while I'm trying to understand what was wrong with the old code. Please correct me if I get this wrong or point out anything I'm missing. The reservation_object_list looks to be protected by a combination of three mechanism: * Holding the reservation object * RCU * seqcount Updating the fence list requires holding the reservation object. But there are some readers that can be called without holding that lock (reservation_object_copy_fences, reservation_object_get_fences_rcu, reservation_object_wait_timeout_rcu, reservation_object_test_signaled_rcu). They rely on RCU to work on a copy and seqcount to make sure they had the most up-to-date information. So any function updating the fence lists needs to do RCU and seqcount correctly to prevent breaking those readers. As I understand it, RCU with seqcount retry just means that readers will spin retrying while there are writers, and writers are never blocked by readers. Writers are blocked by each other, because they need to hold the reservation. The code you added looks a lot like reservation_object_add_shared_replace, which removes fences that have signalled, and atomically replaces obj->fences with a new reservation_fence_list. That atomicity is important
RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do invalidate tlb
> -Original Message- > From: Zhu, Rex > Sent: Thursday, August 16, 2018 1:41 PM > To: 'Alex Deucher' > Cc: Deng, Emily ; amd-gfx list g...@lists.freedesktop.org> > Subject: RE: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq to do > invalidate tlb > > > > > -Original Message- > > From: Alex Deucher > > Sent: Wednesday, August 15, 2018 10:14 PM > > To: Zhu, Rex > > Cc: Deng, Emily ; amd-gfx list > g...@lists.freedesktop.org> > > Subject: Re: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq > > to do invalidate tlb > > > > On Wed, Aug 15, 2018 at 9:56 AM Zhu, Rex wrote: > > > > > > > > > > > > > -Original Message- > > > > From: amd-gfx On Behalf Of > > > > Emily Deng > > > > Sent: Wednesday, August 15, 2018 5:48 PM > > > > To: amd-gfx@lists.freedesktop.org > > > > Cc: Deng, Emily > > > > Subject: [PATCH 1/2] drm/amdgpu/sriov: For sriov runtime, use kiq > > > > to do invalidate tlb > > > > > > > > To avoid the tlb flush not interrupted by world switch, use kiq > > > > and one command to do tlb invalidate. > > > > > > > > v2: > > > > Add firmware version checking. > > > > > > > > v3: > > > > Refine the code, and move the firmware checking into > > > > gfx_v9_0_ring_emit_reg_write_reg_wait. > > > > > > > > SWDEV-161497 > > > > > > The "SWDEV-161497" is meanless. > > > you can describe the issue or just remove the bug number. > > > > > > > > > > > Signed-off-by: Emily Deng > > > > --- > > > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ > > > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- > > > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 15 +++- > > > > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 61 > > > > > > > > 4 files changed, 79 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > > > index 07924d4..67b584b 100644 > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > > > @@ -210,6 +210,10 @@ enum amdgpu_kiq_irq { > > > > AMDGPU_CP_KIQ_IRQ_LAST > > > > }; > > > > > > > > +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ > > > > +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ > > > > +#define MAX_KIQ_REG_TRY 20 > > > > + > > > > int amdgpu_device_ip_set_clockgating_state(void *dev, > > > > enum amd_ip_block_type > > > > block_type, > > > > enum > > > > amd_clockgating_state state); diff --git > > > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > > > index 21adb1b6..3885636 100644 > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > > > > @@ -22,9 +22,6 @@ > > > > */ > > > > > > > > #include "amdgpu.h" > > > > -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ > > > > -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ - > > #define > > > > MAX_KIQ_REG_TRY 20 > > > > > > > > uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { diff > > > > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > > > index 76d979e..c9b3db4 100644 > > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > > > @@ -4348,8 +4348,21 @@ static void > > > > gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, > > > > uint32_t ref, > > > > uint32_t mask) { > > > > int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); > > > > + struct amdgpu_device *adev = ring->adev; > > > > + bool fw_version_ok = false; > > > > > > > > - if (amdgpu_sriov_vf(ring->adev)) > > > > + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { > > > > + if ((adev->gfx.me_fw_version >= 0x009c) && > > > > + (adev- > > > > >gfx.me_feature_version >= 42)) > > > > + if ((adev->gfx.pfp_fw_version >= > > > > + 0x00b1) && > > > > (adev->gfx.pfp_feature_version >= 42)) > > > > + fw_version_ok = true; > > > > + } else { > > > > + if ((adev->gfx.mec_fw_version >= 0x0193) && > > > > + (adev- > > > > >gfx.mec_feature_version >= 42)) > > > > + fw_version_ok = true; > > > > + } > > > > > > Maybe we can add a flag and set the flag when request_firmware. > > > > I was going to suggest the same thing. > > > > > > > > > > > + > > > > + fw_version_ok = (adev->asic_type == CHIP_VEGA10) ? > > > > + fw_version_ok > > > > Also is this specific to vega10 or do all gfx9 parts have this fix? > > Please verify. > > > > Alex > > The patch can work on Raven. > Raven firmware version as : > MC feature version: 0, firmware version: 0x ME feature version: 42, > firmware version: 0x009c PFP feature version: 42,