Re: [PATCH 1/5] drm/amdkfd: Improve doorbell variable names
On Tue, Feb 5, 2019 at 4:43 PM Kuehling, Felix wrote: > > The headline should start with "drm/amdgpu". This change is not KFD > specific. > > I think Alex should review this change. If we are going to change it, let's be consistent for all IPs (e.g., gfx, vcn, etc.). Alex > > Regards, >Felix > > On 2019-02-05 3:31 p.m., Zhao, Yong wrote: > > Indicate that the doorbell offset and range is in dwords. > > > > Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436 > > Signed-off-by: Yong Zhao > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++- > > drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 6 +++--- > > drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 6 +++--- > > drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +++--- > > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +- > > drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +- > > 9 files changed, 25 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index d67f8b1dfe80..6230425f3f3d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs { > > void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring > > *ring); > > u32 (*get_memsize)(struct amdgpu_device *adev); > > void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, > > - bool use_doorbell, int doorbell_index, int > > doorbell_size); > > + bool use_doorbell, int index_in_dw, int > > range_dw_size); > > void (*enable_doorbell_aperture)(struct amdgpu_device *adev, > >bool enable); > > void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > > index 1cfec06f81d4..5c8d04c353d0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > > @@ -39,6 +39,7 @@ struct amdgpu_doorbell { > >* can be 64-bit, so the index defined is in qword. > >*/ > > struct amdgpu_doorbell_index { > > + uint32_t entry_dw_size; > > uint32_t kiq; > > uint32_t mec_ring0; > > uint32_t mec_ring1; > > @@ -73,7 +74,7 @@ struct amdgpu_doorbell_index { > > }; > > uint32_t max_assignment; > > /* Per engine SDMA doorbell size in dword */ > > - uint32_t sdma_doorbell_range; > > + uint32_t dw_range_per_sdma_eng; > > }; > > > > typedef enum _AMDGPU_DOORBELL_ASSIGNMENT > > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > > b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > > index cc967dbfd631..64bc41afd71e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > > @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device > > *adev) > > } > > > > static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int > > instance, > > - bool use_doorbell, int doorbell_index, int > > doorbell_size) > > + bool use_doorbell, int index_in_dw, int range_dw_size) > > { > > u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, > > mmBIF_SDMA0_DOORBELL_RANGE) : > > SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); > > @@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct > > amdgpu_device *adev, int instan > > u32 doorbell_range = RREG32(reg); > > > > if (use_doorbell) { > > - doorbell_range = REG_SET_FIELD(doorbell_range, > > BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); > > - doorbell_range = REG_SET_FIELD(doorbell_range, > > BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); > > + doorbell_range = REG_SET_FIELD(doorbell_range, > > BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw); > > + doorbell_range = REG_SET_FIELD(doorbell_range, > > BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size); > > } else > > doorbell_range = REG_SET_FIELD(doorbell_range, > > BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > > b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > > index 1cdb98ad2db3..28cc96b7a292 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > > @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device > > *adev) > > } > > > > static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int > > instance, > > - bool use_doorbell, int doorbell_index, int > > doorbell_size) > > + bool
Re: [PATCH v2 1/2] drm/amdgpu: Fix pci platform speed and width
On Tue, Feb 5, 2019 at 4:38 PM Kasiviswanathan, Harish wrote: > > The new Vega series GPU cards have in-built bridges. To get the pcie > speed and width supported by the platform walk the hierarchy and get the > slowest link. > > Change-Id: I3196d158b0c614cbb5d7a34c793a58cb95322d32 > Signed-off-by: Harish Kasiviswanathan > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 > +++--- > 1 file changed, 46 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index d7dddb9..fcab1fe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3618,6 +3618,38 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, > return r; > } > > +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, > + enum pci_bus_speed *speed, > + enum pcie_link_width *width) > +{ > + struct pci_dev *pdev = adev->pdev; > + enum pci_bus_speed cur_speed; > + enum pcie_link_width cur_width; > + > + *speed = PCI_SPEED_UNKNOWN; > + *width = PCIE_LNK_WIDTH_UNKNOWN; > + > + while (pdev) { > + cur_speed = pcie_get_speed_cap(pdev); > + cur_width = pcie_get_width_cap(pdev); > + > + if (cur_speed != PCI_SPEED_UNKNOWN) { > + if (*speed == PCI_SPEED_UNKNOWN) > + *speed = cur_speed; > + else if (cur_speed < *speed) > + *speed = cur_speed; > + } > + > + if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { > + if (*width == PCIE_LNK_WIDTH_UNKNOWN) > + *width = cur_width; > + else if (cur_width < *width) > + *width = cur_width; > + } > + pdev = pci_upstream_bridge(pdev); > + } > +} > + > /** > * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot > * > @@ -3630,8 +3662,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device > *adev, > static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) > { > struct pci_dev *pdev; > - enum pci_bus_speed speed_cap; > - enum pcie_link_width link_width; > + enum pci_bus_speed speed_cap, platform_speed_cap; > + enum pcie_link_width platform_link_width; > > if (amdgpu_pcie_gen_cap) > adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; > @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct > amdgpu_device *adev) > return; > } > > + if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) > + return; I think you can drop this check, but I guess it doesn't hurt. Either way, series is: Reviewed-by: Alex Deucher > + > + amdgpu_device_get_min_pci_speed_width(adev, _speed_cap, > + _link_width); > + > if (adev->pm.pcie_gen_mask == 0) { > /* asic caps */ > pdev = adev->pdev; > @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct > amdgpu_device *adev) > adev->pm.pcie_gen_mask |= > CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; > } > /* platform caps */ > - pdev = adev->ddev->pdev->bus->self; > - speed_cap = pcie_get_speed_cap(pdev); > - if (speed_cap == PCI_SPEED_UNKNOWN) { > + if (platform_speed_cap == PCI_SPEED_UNKNOWN) { > adev->pm.pcie_gen_mask |= > (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); > } else { > - if (speed_cap == PCIE_SPEED_16_0GT) > + if (platform_speed_cap == PCIE_SPEED_16_0GT) > adev->pm.pcie_gen_mask |= > (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); > - else if (speed_cap == PCIE_SPEED_8_0GT) > + else if (platform_speed_cap == PCIE_SPEED_8_0GT) > adev->pm.pcie_gen_mask |= > (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | > > CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); > - else if (speed_cap ==
Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
On 2019-02-05 11:00 a.m., Koenig, Christian wrote: > Ah! The initial clear of the PDs is syncing to everything! Right. Using amdgpu_sync_resv(... AMDGPU_FENCE_OWNER_VM ...) in amdgpu_vm_clear_bo seems to help. But if I also change the amdgpu_job_submit to use AMDGPU_FENCE_OWNER_VM, I get a VM fault and CP hang very early in my test. Not sure what's happening there. This is what I changed: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8b240f9..06c28ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -826,17 +826,18 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, WARN_ON(job->ibs[0].length_dw > 64); r = amdgpu_sync_resv(adev, >sync, bo->tbo.resv, -AMDGPU_FENCE_OWNER_UNDEFINED, false); +AMDGPU_FENCE_OWNER_VM, false); if (r) goto error_free; - r = amdgpu_job_submit(job, >entity, AMDGPU_FENCE_OWNER_UNDEFINED, + r = amdgpu_job_submit(job, >entity, AMDGPU_FENCE_OWNER_VM, ); if (r) goto error_free; amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); + dma_fence_put(vm->last_update); + vm->last_update = fence; if (bo->shadow) return amdgpu_vm_clear_bo(adev, vm, bo->shadow, Basically I tried to do the synchronization exactly like amdgpu_vm_update_directories. The only way this clear could cause a VM fault is, if a subsequent PTE/PDS update happens too early and gets overwritten by the pending clear. But don't the clear and the next PTE/PDE update go to the same queue (vm->entity) and are implicitly synchronized? Answer after another hour of pouring over code and history: No, sched entities are no longer equivalent to queues. The scheduler can itself load-balance VM updates using multiple SDMA queues. Sigh. OK, so page table updates to different PTEs don't usually need to synchronize with each other. But how do page table updates to the same address get synchronized? How do you guarantee that two updates of the same PTE are processed by the hardware in the correct order, if AMDGPU_FENCE_OWNER_VM fences don't synchronize with each other? > > Ok, that is easy to fix. Not that easy. See above. ;) Regards, Felix > > Christian. > > Am 05.02.19 um 16:49 schrieb Kuehling, Felix: >> I saw a backtrace from the GPU scheduler when I was debugging this >> yesterday, but those backtraces never tell us where the command submission >> came from. It could be memory initialization, or some migration at >> buffer-validation. Basically, any command submission triggered by page table >> allocation that synchronizes with the PD reservation object is a problem. >> >> Regards, >> Felix >> >> -Original Message- >> From: Christian König >> Sent: Tuesday, February 05, 2019 10:40 AM >> To: Kuehling, Felix ; Koenig, Christian >> ; amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand >> >> Am 05.02.19 um 16:20 schrieb Kuehling, Felix: > This may cause regressions in KFD if PT BO allocation can trigger > eviction fences. I don't think that can happen, but need to double check as well. Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense. >>> Eviction fences don't cause actual memory evictions. They are the result of >>> memory evictions. They cause KFD processes to be preempted in order to >>> allow the fence to signal so memory can be evicted. The problem is that a >>> page table allocation waits for the fences on the PD reservation, which >>> looks like an eviction to KFD and triggers an unnecessary preemption. There >>> is no actual memory eviction happening here. >> Yeah, but where is that actually coming from? >> >> It sounds like we still unnecessary synchronize page table updates somewhere. >> >> Do you have a call chain? >> >> Regards, >> Christian. >> >>> Regards, >>> Felix >>> >>> -Original Message- >>> From: Christian König >>> Sent: Tuesday, February 05, 2019 6:37 AM >>> To: Kuehling, Felix ; >>> amd-gfx@lists.freedesktop.org >>> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand >>> This may cause regressions in KFD if PT BO allocation can trigger eviction fences. >>> I don't think that can happen, but need to double check as well. >>> >>> Otherwise allocating page tables could try to evict other page tables from >>> the same process and that seriously doesn't make much sense. >>> Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? >>> For inside the kernel I can't think of any relevant use case, except for >>> the eviction call path and there it is
[pull] amdgpu drm-fixes-5.0
Hi Dave, Daniel, Fixes for 5.0: - Fix missing freesync properties on eDP - Fix locking in pasid mgr - Fix clang warning in kfd - DC/powerplay fix - Fix reported rev ids on raven - Doorbell fix for vega20 The following changes since commit 6e11ea9de9576a644045ffdc2067c09bc2012eda: drm/amdgpu: Transfer fences to dmabuf importer (2019-01-30 12:52:44 -0500) are available in the Git repository at: git://people.freedesktop.org/~agd5f/linux drm-fixes-5.0 for you to fetch changes up to 7fad8da1ae23171de0ea3cdb2c620f71eb2ab983: drm/amd/display: Attach VRR properties for eDP connectors (2019-02-05 18:10:28 -0500) Huang Rui (1): drm/amdgpu: fix the incorrect external id for raven series Jay Cornwall (1): drm/amdgpu: Implement doorbell self-ring for NBIO 7.4 Nathan Chancellor (1): drm/amdkfd: Fix if preprocessor statement above kfd_fill_iolink_info_for_cpu Nicholas Kazlauskas (1): drm/amd/display: Attach VRR properties for eDP connectors Philip Yang (1): drm/amdgpu: use spin_lock_irqsave to protect vm_manager.pasid_idr Roman Li (1): drm/amd/display: Fix fclk idle state drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 5 +++-- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c| 13 + drivers/gpu/drm/amd/amdgpu/soc15.c| 6 -- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 10 +- 6 files changed, 32 insertions(+), 7 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v7
Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 158 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 178 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- 9 files changed, 198 insertions(+), 282 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..0e1711a75b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..ae2d838d31ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages -* should not be allocated -*/ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); i = 0; @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(_list_entry->head); mutex_unlock(_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, ); if (unlikely(ret)) return ret; @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
Hi Christian, I will submit new patch for review, my comments embedded inline below. Thanks, Philip On 2019-02-05 1:09 p.m., Koenig, Christian wrote: > Am 05.02.19 um 18:25 schrieb Yang, Philip: >> [SNIP]+ + if (r == -ERESTARTSYS) { + if (!--tries) { + DRM_ERROR("Possible deadlock? Retry too many times\n"); + return -EDEADLK; + } + goto restart; >>> You really need to restart the IOCTL to potentially handle signals. >>> >>> But since the calling code correctly handles this already you can just >>> drop this change as far as I can see. >>> >> I agree that we should return -ERESTARTSYS to upper layer to handle signals. >> >> But I do not find upper layers handle -ERESTARTSYS in the entire calling >> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl -> >> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to >> application. I confirm it, libdrm userptr test application calling >> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS. >> >> So application should handle -ERESTARTSYS to restart the ioctl, but >> libdrm userptr test application doesn't handle this. This causes the >> test failed. > > This is a bug in the test cases then. > > -ERESTARTSYS can happen at any time during interruptible waiting and it > is mandatory for the upper layer to handle it correctly. > -ERESTARTSYS can be returned only when signal is pending, signal handler will translate ERESTARTSYS to EINTR, drmIoctl in libdrm does handle EINTR and restart the ioctl. The test cases are ok. Driver fail path should not return ERESTARTSYS to user space. The new patch, I change amdgpu_cs_submit to return -EAGAIN if userptr is updated, and amdgpu_cs_ioctl redo the ioctl only if error code is -EAGAIN. ERESTARTSYS error code returns to user space for signal handle as before. >> >> Below are details of userptr path difference. For the previous path, >> libdrm test always goes to step 2, step 3 never trigger. So it never >> return -ERESTARTSYS, but in theory, this could happen. >> >> For HMM path, the test always goes to step 3, we have to handle this >> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to >> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will >> submit new patch. > > Clearly a NAK, this won't work correctly. > I don't understand your concern, may you explain the reason? > Christian. > >> >> The previous userptr path: >> 1. gem_userptr_ioctl to register userptr >> 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update >> userptr >> 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated, >> return -ERESTARTSYS >> >> The new HMM userptr path: >> 1. gem_userptr_ioctl to register userptr >> 2. amdgpu_cs_parser_bos, start HMM to track userptr update >> 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is >> invalidated, return -ERESTARTSYS >> >> + } + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..555285e329ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(>tbo, >placement, ); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, ); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index e356867d2308..fa2516016c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); }
Re: [PATCH 1/5] drm/amdkfd: Improve doorbell variable names
The headline should start with "drm/amdgpu". This change is not KFD specific. I think Alex should review this change. Regards, Felix On 2019-02-05 3:31 p.m., Zhao, Yong wrote: > Indicate that the doorbell offset and range is in dwords. > > Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436 > Signed-off-by: Yong Zhao > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 6 +++--- > drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 6 +++--- > drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +++--- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- > drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +- > drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +- > 9 files changed, 25 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index d67f8b1dfe80..6230425f3f3d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs { > void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); > u32 (*get_memsize)(struct amdgpu_device *adev); > void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, > - bool use_doorbell, int doorbell_index, int > doorbell_size); > + bool use_doorbell, int index_in_dw, int range_dw_size); > void (*enable_doorbell_aperture)(struct amdgpu_device *adev, >bool enable); > void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > index 1cfec06f81d4..5c8d04c353d0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h > @@ -39,6 +39,7 @@ struct amdgpu_doorbell { >* can be 64-bit, so the index defined is in qword. >*/ > struct amdgpu_doorbell_index { > + uint32_t entry_dw_size; > uint32_t kiq; > uint32_t mec_ring0; > uint32_t mec_ring1; > @@ -73,7 +74,7 @@ struct amdgpu_doorbell_index { > }; > uint32_t max_assignment; > /* Per engine SDMA doorbell size in dword */ > - uint32_t sdma_doorbell_range; > + uint32_t dw_range_per_sdma_eng; > }; > > typedef enum _AMDGPU_DOORBELL_ASSIGNMENT > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > index cc967dbfd631..64bc41afd71e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c > @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) > } > > static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int > instance, > - bool use_doorbell, int doorbell_index, int > doorbell_size) > + bool use_doorbell, int index_in_dw, int range_dw_size) > { > u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, > mmBIF_SDMA0_DOORBELL_RANGE) : > SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); > @@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct > amdgpu_device *adev, int instan > u32 doorbell_range = RREG32(reg); > > if (use_doorbell) { > - doorbell_range = REG_SET_FIELD(doorbell_range, > BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); > - doorbell_range = REG_SET_FIELD(doorbell_range, > BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); > + doorbell_range = REG_SET_FIELD(doorbell_range, > BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw); > + doorbell_range = REG_SET_FIELD(doorbell_range, > BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size); > } else > doorbell_range = REG_SET_FIELD(doorbell_range, > BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); > > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > index 1cdb98ad2db3..28cc96b7a292 100644 > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) > } > > static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int > instance, > - bool use_doorbell, int doorbell_index, int > doorbell_size) > + bool use_doorbell, int index_in_dw, int range_dw_size) > { > u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, > mmBIF_SDMA0_DOORBELL_RANGE) : > SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); > @@ -75,8 +75,8 @@ static void nbio_v7_0_sdma_doorbell_range(struct > amdgpu_device *adev, int instan > u32 doorbell_range = RREG32(reg); > >
[PATCH v2 1/2] drm/amdgpu: Fix pci platform speed and width
The new Vega series GPU cards have in-built bridges. To get the pcie speed and width supported by the platform walk the hierarchy and get the slowest link. Change-Id: I3196d158b0c614cbb5d7a34c793a58cb95322d32 Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++--- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d7dddb9..fcab1fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3618,6 +3618,38 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return r; } +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *pdev = adev->pdev; + enum pci_bus_speed cur_speed; + enum pcie_link_width cur_width; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while (pdev) { + cur_speed = pcie_get_speed_cap(pdev); + cur_width = pcie_get_width_cap(pdev); + + if (cur_speed != PCI_SPEED_UNKNOWN) { + if (*speed == PCI_SPEED_UNKNOWN) + *speed = cur_speed; + else if (cur_speed < *speed) + *speed = cur_speed; + } + + if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { + if (*width == PCIE_LNK_WIDTH_UNKNOWN) + *width = cur_width; + else if (cur_width < *width) + *width = cur_width; + } + pdev = pci_upstream_bridge(pdev); + } +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3630,8 +3662,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { struct pci_dev *pdev; - enum pci_bus_speed speed_cap; - enum pcie_link_width link_width; + enum pci_bus_speed speed_cap, platform_speed_cap; + enum pcie_link_width platform_link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) return; } + if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) + return; + + amdgpu_device_get_min_pci_speed_width(adev, _speed_cap, + _link_width); + if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ pdev = adev->pdev; @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; } /* platform caps */ - pdev = adev->ddev->pdev->bus->self; - speed_cap = pcie_get_speed_cap(pdev); - if (speed_cap == PCI_SPEED_UNKNOWN) { + if (platform_speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); } else { - if (speed_cap == PCIE_SPEED_16_0GT) + if (platform_speed_cap == PCIE_SPEED_16_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); - else if (speed_cap == PCIE_SPEED_8_0GT) + else if (platform_speed_cap == PCIE_SPEED_8_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); - else if (speed_cap == PCIE_SPEED_5_0GT) + else if (platform_speed_cap == PCIE_SPEED_5_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); else @@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
[PATCH v2 2/2] drm/amd/powerplay: add override pcie parameters for Vega20 (v2)
v2: Fix SMU message format Send override message after SMU enable features Change-Id: Ib880c83bc7aa12be370cf6619acfe66e12664c9c Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 45 +- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index da022ca..e1b1656 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -771,40 +771,49 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +/* + * Override PCIe link speed and link width for DPM Level 1. PPTable entries + * reflect the ASIC capabilities and not the system capabilities. For e.g. + * Vega20 board in a PCI Gen3 system. In this case, when SMU's tries to switch + * to DPM1, it fails as system doesn't support Gen4. + */ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); - uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; int ret; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) - pcie_speed = 16; + pcie_gen = 3; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - pcie_speed = 8; + pcie_gen = 2; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - pcie_speed = 5; + pcie_gen = 1; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) - pcie_speed = 2; + pcie_gen = 0; if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) - pcie_width = 32; + pcie_width = 7; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) - pcie_width = 16; + pcie_width = 6; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) - pcie_width = 12; + pcie_width = 5; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) - pcie_width = 8; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) pcie_width = 2; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) pcie_width = 1; - pcie_arg = pcie_width | (pcie_speed << 8); - + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 +* Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 +* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 +*/ + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_OverridePcieParameters, pcie_arg); + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg); PP_ASSERT_WITH_CODE(!ret, "[OverridePcieParameters] Attempt to override pcie params failed!", return ret); @@ -1611,11 +1620,6 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to initialize SMC table!", return result); - result = vega20_override_pcie_parameters(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "[EnableDPMTasks] Failed to override pcie parameters!", - return result); - result = vega20_run_btc(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to run btc!", @@ -1631,6 +1635,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to enable all smu features!", return result); + result = vega20_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + result = vega20_notify_smc_display_change(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to notify smc display change!", -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/5] drm/amdkfd: Fix bugs regarding CP user queue doorbells mask on SOC15
Reserved doorbells for SDMA IH and VCN were not properly masked out when allocating doorbells for CP user queues. This patch fixed that. Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 17 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 4 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 3 +++ drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 19 ++--- 7 files changed, 54 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e957e42c539a..ee8527701731 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.sdma_doorbell[1][i+1] = adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); } - /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for -* SDMA, IH and VCN. So don't use them for the CP. + + /* Because of the setting in registers like +* SDMA0_DOORBELL_RANGE etc., BIF statically uses the +* lower 12 bits of doorbell address for routing. In +* order to route the CP queue doorbells to CP engine, +* the doorbells allocated to CP queues have to be +* outside the range set for SDMA, VCN, and IH blocks +* Prior to SOC15, all queues use queue ID to +* determine doorbells. */ - gpu_resources.reserved_doorbell_mask = 0x1e0; - gpu_resources.reserved_doorbell_val = 0x0e0; + gpu_resources.reserved_doorbells_start = + adev->doorbell_index.sdma_engine[0]; + gpu_resources.reserved_doorbells_end = + adev->doorbell_index.last_non_cp; kgd2kfd_device_init(adev->kfd.dev, _resources); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 59c41841cbce..74b8e2bfabd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -70,6 +70,7 @@ struct amdgpu_doorbell_index { uint32_t vce_ring6_7; } uvd_vce; }; + uint32_t last_non_cp; uint32_t max_assignment; uint32_t last_idx; /* Per engine SDMA doorbell size in dword */ @@ -141,6 +142,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D, AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E, AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F, + AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT= 0x18F, AMDGPU_VEGA20_DOORBELL_INVALID = 0x } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; @@ -216,6 +218,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, + AMDGPU_DOORBELL64_LAST_NON_CP = AMDGPU_DOORBELL64_VCE_RING6_7, + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0x } AMDGPU_DOORBELL64_ASSIGNMENT; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 65214c7b0b20..76166c0ec509 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -80,6 +80,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + + adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; + /* In unit of dword doorbell */ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; adev->doorbell_index.dw_range_per_sdma_eng = diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index a388d306391a..10df2fed5a99 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -84,6 +84,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
[PATCH 5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources
We can directly calculate the sdma doorbell index in the process doorbell pages through the doorbell_index structure in amdgpu_device, so no need to cache them in kgd2kfd_shared_resources any more, resulting in more portable code. Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 55 ++- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 -- .../gpu/drm/amd/include/kgd_kfd_interface.h | 2 +- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index ee8527701731..e62c3703169a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - int i, n; + int i; int last_valid_bit; if (adev->kfd.dev) { @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .gpuvm_size = min(adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GMC_HOLE_START), - .drm_render_minor = adev->ddev->render->index + .drm_render_minor = adev->ddev->render->index, + .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, + }; /* this is going to have a few of the MSBs set that we need to @@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) _resources.doorbell_aperture_size, _resources.doorbell_start_offset); - if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd_device_init(adev->kfd.dev, _resources); - return; - } - - n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8; - - for (i = 0; i < n; i += 2) { - /* On SOC15 the BIF is involved in routing -* doorbells using the low 12 bits of the -* address. Communicate the assignments to -* KFD. KFD uses two doorbell pages per -* process in case of 64-bit doorbells so we -* can use each doorbell assignment twice. + if (adev->asic_type >= CHIP_VEGA10) { + /* Because of the setting in registers like +* SDMA0_DOORBELL_RANGE etc., BIF statically uses the +* lower 12 bits of doorbell address for routing. In +* order to route the CP queue doorbells to CP engine, +* the doorbells allocated to CP queues have to be +* outside the range set for SDMA, VCN, and IH blocks +* Prior to SOC15, all queues use queue ID to +* determine doorbells. */ - gpu_resources.sdma_doorbell[0][i] = - adev->doorbell_index.sdma_engine[0] + (i >> 1); - gpu_resources.sdma_doorbell[0][i+1] = - adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1); - gpu_resources.sdma_doorbell[1][i] = - adev->doorbell_index.sdma_engine[1] + (i >> 1); - gpu_resources.sdma_doorbell[1][i+1] = - adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); + gpu_resources.reserved_doorbells_start = + adev->doorbell_index.sdma_engine[0]; + gpu_resources.reserved_doorbells_end = + adev->doorbell_index.last_non_cp; } - /* Because of the setting in registers like -* SDMA0_DOORBELL_RANGE etc., BIF statically uses the -* lower 12 bits of doorbell address for routing. In -* order to route the CP queue doorbells to CP engine, -* the doorbells allocated to CP queues have to be -* outside the range set for SDMA, VCN, and IH blocks -* Prior to SOC15, all queues use queue ID to -* determine doorbells. -*/ - gpu_resources.reserved_doorbells_start = - adev->doorbell_index.sdma_engine[0]; - gpu_resources.reserved_doorbells_end = - adev->doorbell_index.last_non_cp; - kgd2kfd_device_init(adev->kfd.dev, _resources); } } diff --git
[PATCH 2/5] drm/amdgpu: Fix a bug in setting CP_MEC_DOORBELL_RANGE_UPPER on SOC15
Because CP can use all doorbells outside the ones reserved for SDMA, IH, and VCN, so the value set to CP_MEC_DOORBELL_RANGE_UPPER should be the maximal index possible in a page. Change-Id: I402a56ce9a80e6c2ed2f96be431ae71ca88e73a4 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 3 +++ drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 5c8d04c353d0..90eca63605ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -73,6 +73,7 @@ struct amdgpu_doorbell_index { } uvd_vce; }; uint32_t max_assignment; + uint32_t last_idx; /* Per engine SDMA doorbell size in dword */ uint32_t dw_range_per_sdma_eng; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 262ee3cf6f1c..0278e3ab6b94 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2998,7 +2998,7 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell_index.userqueue_end * 2) << 2); + (adev->doorbell_index.last_idx * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index d2409df2dde9..9eb8c9209231 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -88,5 +88,8 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) (adev->doorbell_index.sdma_engine[1] - adev->doorbell_index.sdma_engine[0]) * adev->doorbell_index.entry_dw_size; + + adev->doorbell_index.last_idx = PAGE_SIZE + / (sizeof(uint32_t) * adev->doorbell_index.entry_dw_size) - 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index b28c5999d8f0..aa8c7699c689 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -91,5 +91,8 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) (adev->doorbell_index.sdma_engine[1] - adev->doorbell_index.sdma_engine[0]) * adev->doorbell_index.entry_dw_size; + + adev->doorbell_index.last_idx = PAGE_SIZE + / (sizeof(uint32_t) * adev->doorbell_index.entry_dw_size) - 1; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/5] drm/amdgpu: Delete user queue doorbell variables
They are no longer used, so delete them to avoid confusion. Change-Id: I3cf23fe7110ff88f53c0c279b2b4ec8d1a53b87c Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 8 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 2 -- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 90eca63605ea..59c41841cbce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -49,8 +49,6 @@ struct amdgpu_doorbell_index { uint32_t mec_ring5; uint32_t mec_ring6; uint32_t mec_ring7; - uint32_t userqueue_start; - uint32_t userqueue_end; uint32_t gfx_ring0; uint32_t sdma_engine[8]; uint32_t ih; @@ -113,8 +111,6 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008, AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009, AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A, - AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B, - AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A, AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B, /* SDMA:256~335*/ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0= 0x100, @@ -178,10 +174,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_MEC_RING6 = 0x09, AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, - /* User queue doorbell range (128 doorbells) */ - AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, - AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, - /* Graphics engine */ AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 9eb8c9209231..65214c7b0b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -68,8 +68,6 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7; - adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; - adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index aa8c7699c689..a388d306391a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -66,8 +66,6 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7; - adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; - adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/5] drm/amdkfd: Improve doorbell variable names
Indicate that the doorbell offset and range is in dwords. Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +- 9 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d67f8b1dfe80..6230425f3f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs { void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size); + bool use_doorbell, int index_in_dw, int range_dw_size); void (*enable_doorbell_aperture)(struct amdgpu_device *adev, bool enable); void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 1cfec06f81d4..5c8d04c353d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -39,6 +39,7 @@ struct amdgpu_doorbell { * can be 64-bit, so the index defined is in qword. */ struct amdgpu_doorbell_index { + uint32_t entry_dw_size; uint32_t kiq; uint32_t mec_ring0; uint32_t mec_ring1; @@ -73,7 +74,7 @@ struct amdgpu_doorbell_index { }; uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ - uint32_t sdma_doorbell_range; + uint32_t dw_range_per_sdma_eng; }; typedef enum _AMDGPU_DOORBELL_ASSIGNMENT diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index cc967dbfd631..64bc41afd71e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) } static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size) + bool use_doorbell, int index_in_dw, int range_dw_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); @@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instan u32 doorbell_range = RREG32(reg); if (use_doorbell) { - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..28cc96b7a292 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) } static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size) + bool use_doorbell, int index_in_dw, int range_dw_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); @@ -75,8 +75,8 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan u32 doorbell_range = RREG32(reg); if (use_doorbell) { - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET,
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
Am 05.02.19 um 18:25 schrieb Yang, Philip: > [SNIP]+ >>> + if (r == -ERESTARTSYS) { >>> + if (!--tries) { >>> + DRM_ERROR("Possible deadlock? Retry too many times\n"); >>> + return -EDEADLK; >>> + } >>> + goto restart; >> You really need to restart the IOCTL to potentially handle signals. >> >> But since the calling code correctly handles this already you can just >> drop this change as far as I can see. >> > I agree that we should return -ERESTARTSYS to upper layer to handle signals. > > But I do not find upper layers handle -ERESTARTSYS in the entire calling > path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl -> > drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to > application. I confirm it, libdrm userptr test application calling > amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS. > > So application should handle -ERESTARTSYS to restart the ioctl, but > libdrm userptr test application doesn't handle this. This causes the > test failed. This is a bug in the test cases then. -ERESTARTSYS can happen at any time during interruptible waiting and it is mandatory for the upper layer to handle it correctly. > > Below are details of userptr path difference. For the previous path, > libdrm test always goes to step 2, step 3 never trigger. So it never > return -ERESTARTSYS, but in theory, this could happen. > > For HMM path, the test always goes to step 3, we have to handle this > case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to > return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will > submit new patch. Clearly a NAK, this won't work correctly. Christian. > > The previous userptr path: > 1. gem_userptr_ioctl to register userptr > 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update > userptr > 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated, > return -ERESTARTSYS > > The new HMM userptr path: > 1. gem_userptr_ioctl to register userptr > 2. amdgpu_cs_parser_bos, start HMM to track userptr update > 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is > invalidated, return -ERESTARTSYS > > >>> + } >>> + >>> return r; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> index d21dd2f369da..555285e329ed 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device >>> *dev, void *data, >>> r = amdgpu_bo_reserve(bo, true); >>> if (r) >>> - goto free_pages; >>> + goto user_pages_done; >>> amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); >>> r = ttm_bo_validate(>tbo, >placement, ); >>> amdgpu_bo_unreserve(bo); >>> if (r) >>> - goto free_pages; >>> + goto user_pages_done; >>> } >>> r = drm_gem_handle_create(filp, gobj, ); >>> - /* drop reference from allocate - handle holds it now */ >>> - drm_gem_object_put_unlocked(gobj); >>> if (r) >>> - return r; >>> + goto user_pages_done; >>> args->handle = handle; >>> - return 0; >>> -free_pages: >>> - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); >>> +user_pages_done: >>> + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) >>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >>> release_object: >>> drm_gem_object_put_unlocked(gobj); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> index e356867d2308..fa2516016c43 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct >>> amdgpu_mn_node *node, >>> true, false, MAX_SCHEDULE_TIMEOUT); >>> if (r <= 0) >>> DRM_ERROR("(%ld) failed to wait for user bo\n", r); >>> - >>> - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); >>> } >>> } >>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>> mutex_unlock(>mn_lock); >>> } >>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ >>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { >>> + (1 << 0), /* HMM_PFN_VALID */ >>> + (1 << 1), /* HMM_PFN_WRITE */ >>> + 0 /* HMM_PFN_DEVICE_PRIVATE */ >>> +}; >>> + >>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { >>> + 0xfffeUL, /* HMM_PFN_ERROR */ >>> + 0, /* HMM_PFN_NONE */ >>> + 0xfffcUL /* HMM_PFN_SPECIAL */ >>> +}; >>> + >>> +void amdgpu_hmm_init_range(struct hmm_range *range) >>> +{ >>> + if (range) { >>> + range->flags = hmm_range_flags; >>> + range->values = hmm_range_values; >>> + range->pfn_shift =
Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()
Den 05.02.2019 17.31, skrev Daniel Vetter: > On Tue, Feb 05, 2019 at 11:20:55AM +0100, Noralf Trønnes wrote: >> >> >> Den 05.02.2019 10.11, skrev Daniel Vetter: >>> On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote: Den 04.02.2019 16.41, skrev Daniel Vetter: > On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote: >> The only thing now that makes drm_dev_unplug() special is that it sets >> drm_device->unplugged. Move this code to drm_dev_unregister() so that we >> can remove drm_dev_unplug(). >> >> Signed-off-by: Noralf Trønnes >> --- [...] >> drivers/gpu/drm/drm_drv.c | 27 +++ >> include/drm/drm_drv.h | 10 -- >> 2 files changed, 19 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c >> index 05bbc2b622fc..e0941200edc6 100644 >> --- a/drivers/gpu/drm/drm_drv.c >> +++ b/drivers/gpu/drm/drm_drv.c >> @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit); >> */ >> void drm_dev_unplug(struct drm_device *dev) >> { >> -/* >> - * After synchronizing any critical read section is guaranteed >> to see >> - * the new value of ->unplugged, and any critical section which >> might >> - * still have seen the old value of ->unplugged is guaranteed >> to have >> - * finished. >> - */ >> -dev->unplugged = true; >> -synchronize_srcu(_unplug_srcu); >> - >> drm_dev_unregister(dev); >> drm_dev_put(dev); >> } >> @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register); >> * drm_dev_register() but does not deallocate the device. The caller >> must call >> * drm_dev_put() to drop their final reference. >> * >> - * A special form of unregistering for hotpluggable devices is >> drm_dev_unplug(), >> - * which can be called while there are still open users of @dev. >> + * This function can be called while there are still open users of @dev >> as long >> + * as the driver protects its device resources using drm_dev_enter() and >> + * drm_dev_exit(). >> * >> * This should be called first in the device teardown code to make sure >> - * userspace can't access the device instance any more. >> + * userspace can't access the device instance any more. Drivers that >> support >> + * device unplug will probably want to call >> drm_atomic_helper_shutdown() first > > Read once more with a bit more coffee, spotted this: > > s/first/afterwards/ - shutting down the hw before we've taken it away from > userspace is kinda the wrong way round. It should be the inverse of driver > load, which is 1) allocate structures 2) prep hw 3) register driver with > the world (simplified ofc). > The problem is that drm_dev_unregister() sets the device as unplugged and if drm_atomic_helper_shutdown() is called afterwards it's not allowed to touch hardware. I know it's the wrong order, but the only way to do it in the right order is to have a separate function that sets unplugged: drm_dev_unregister(); drm_atomic_helper_shutdown(); drm_dev_set_unplugged(); >>> >>> Annoying ... but yeah calling _shutdown() before we stopped userspace is >>> also not going to work. Because userspace could quickly re-enable >>> something, and then the refcounts would be all wrong again and leaking >>> objects. >>> >> >> What happens with a USB device that is unplugged with open userspace, >> will that leak objects? > > Maybe we've jumped to conclusions. drm_atomic_helper_shutdown() will run > as normal, the only thing that should be skipped is actually touching the > hw (as long as the driver doesn't protect too much with > drm_dev_enter/exit). So all the software updates (including refcounting > updates) will still be done. Ofc current udl is not yet atomic, so in > reality something else happens. > > And we ofc still have the same issue that if you just unload the driver, > then the hw will stay on (which might really confuse the driver on next > load, when it assumes that it only gets loaded from cold boot where > everything is off - which usually is the case on an arm soc at least). > >>> I get a bit the feeling we're over-optimizing here with trying to devm-ize >>> drm_dev_register. Just getting drm_device correctly devm-ized is a big >>> step forward already, and will open up a lot of TODO items across a lot of >>> drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_* >>> structs, which gets released together with drm_device. I think that's a >>> much clearer path forward, I think we all agree that getting the kfree out >>> of the driver codes is a good thing, and it would allow us to do this >>> correctly. >>> >>>
Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!
On 2019-02-05 4:45 p.m., Christian König wrote: > Possible, but unlikely. > > That sounds more like a reference counting problem where something is > killing the BO while it is still on the LRU. FWIW, I've hit this twice now today, whereas I don't remember ever hitting it before (not 100% sure though). I reverted the remaining hunk of the "cleanup setting bulk_movable" change, and it survived a piglit run. Could just be luck, though... -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
Hi Christian, My comments are embedded below. I will submit another patch to address those. Thanks, Philip On 2019-02-05 6:52 a.m., Christian König wrote: > Am 04.02.19 um 19:23 schrieb Yang, Philip: >> Use HMM helper function hmm_vma_fault() to get physical pages backing >> userptr and start CPU page table update track of those pages. Then use >> hmm_vma_range_done() to check if those pages are updated before >> amdgpu_cs_submit for gfx or before user queues are resumed for kfd. >> >> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart >> from scratch, for kfd, restore worker is rescheduled to retry. >> >> HMM simplify the CPU page table concurrent update check, so remove >> guptasklock, mmu_invalidations, last_set_pages fields from >> amdgpu_ttm_tt struct. >> >> HMM does not pin the page (increase page ref count), so remove related >> operations like release_pages(), put_page(), mark_page_dirty(). >> >> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 >> Signed-off-by: Philip Yang >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++--- >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 -- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- >> 9 files changed, 198 insertions(+), 277 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> index 0b31a1859023..0e1711a75b68 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> @@ -61,7 +61,6 @@ struct kgd_mem { >> atomic_t invalid; >> struct amdkfd_process_info *process_info; >> - struct page **user_pages; >> struct amdgpu_sync sync; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index d7b10d79f1de..ae2d838d31ea 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, >> struct mm_struct *mm, >> goto out; >> } >> - /* If no restore worker is running concurrently, user_pages >> - * should not be allocated >> - */ >> - WARN(mem->user_pages, "Leaking user_pages array"); >> - >> - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, >> - sizeof(struct page *), >> - GFP_KERNEL | __GFP_ZERO); >> - if (!mem->user_pages) { >> - pr_err("%s: Failed to allocate pages array\n", __func__); >> - ret = -ENOMEM; >> - goto unregister_out; >> - } >> - >> - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); >> + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); >> if (ret) { >> pr_err("%s: Failed to get user pages: %d\n", __func__, ret); >> - goto free_out; >> + goto unregister_out; >> } >> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); >> - >> ret = amdgpu_bo_reserve(bo, true); >> if (ret) { >> pr_err("%s: Failed to reserve BO\n", __func__); >> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, >> struct mm_struct *mm, >> amdgpu_bo_unreserve(bo); >> release_out: >> - if (ret) >> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >> -free_out: >> - kvfree(mem->user_pages); >> - mem->user_pages = NULL; >> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >> unregister_out: >> if (ret) >> amdgpu_mn_unregister(bo); >> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head, >list); >> amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); >> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem >> *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head, >list); >> i = 0; >> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( >> list_del(_list_entry->head); >> mutex_unlock(_info->lock); >> - /* Free user pages if necessary */ >> - if (mem->user_pages) { >> - pr_debug("%s: Freeing user_pages array\n", __func__); >> - if (mem->user_pages[0]) >> - release_pages(mem->user_pages, >> -
Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()
On Tue, Feb 05, 2019 at 11:20:55AM +0100, Noralf Trønnes wrote: > > > Den 05.02.2019 10.11, skrev Daniel Vetter: > > On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote: > >> > >> > >> Den 04.02.2019 16.41, skrev Daniel Vetter: > >>> On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote: > The only thing now that makes drm_dev_unplug() special is that it sets > drm_device->unplugged. Move this code to drm_dev_unregister() so that we > can remove drm_dev_unplug(). > > Signed-off-by: Noralf Trønnes > --- > >> > >> [...] > >> > drivers/gpu/drm/drm_drv.c | 27 +++ > include/drm/drm_drv.h | 10 -- > 2 files changed, 19 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > index 05bbc2b622fc..e0941200edc6 100644 > --- a/drivers/gpu/drm/drm_drv.c > +++ b/drivers/gpu/drm/drm_drv.c > @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit); > */ > void drm_dev_unplug(struct drm_device *dev) > { > -/* > - * After synchronizing any critical read section is guaranteed > to see > - * the new value of ->unplugged, and any critical section which > might > - * still have seen the old value of ->unplugged is guaranteed > to have > - * finished. > - */ > -dev->unplugged = true; > -synchronize_srcu(_unplug_srcu); > - > drm_dev_unregister(dev); > drm_dev_put(dev); > } > @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register); > * drm_dev_register() but does not deallocate the device. The caller > must call > * drm_dev_put() to drop their final reference. > * > - * A special form of unregistering for hotpluggable devices is > drm_dev_unplug(), > - * which can be called while there are still open users of @dev. > + * This function can be called while there are still open users of @dev > as long > + * as the driver protects its device resources using drm_dev_enter() and > + * drm_dev_exit(). > * > * This should be called first in the device teardown code to make sure > - * userspace can't access the device instance any more. > + * userspace can't access the device instance any more. Drivers that > support > + * device unplug will probably want to call > drm_atomic_helper_shutdown() first > >>> > >>> Read once more with a bit more coffee, spotted this: > >>> > >>> s/first/afterwards/ - shutting down the hw before we've taken it away from > >>> userspace is kinda the wrong way round. It should be the inverse of driver > >>> load, which is 1) allocate structures 2) prep hw 3) register driver with > >>> the world (simplified ofc). > >>> > >> > >> The problem is that drm_dev_unregister() sets the device as unplugged > >> and if drm_atomic_helper_shutdown() is called afterwards it's not > >> allowed to touch hardware. > >> > >> I know it's the wrong order, but the only way to do it in the right > >> order is to have a separate function that sets unplugged: > >> > >>drm_dev_unregister(); > >>drm_atomic_helper_shutdown(); > >>drm_dev_set_unplugged(); > > > > Annoying ... but yeah calling _shutdown() before we stopped userspace is > > also not going to work. Because userspace could quickly re-enable > > something, and then the refcounts would be all wrong again and leaking > > objects. > > > > What happens with a USB device that is unplugged with open userspace, > will that leak objects? Maybe we've jumped to conclusions. drm_atomic_helper_shutdown() will run as normal, the only thing that should be skipped is actually touching the hw (as long as the driver doesn't protect too much with drm_dev_enter/exit). So all the software updates (including refcounting updates) will still be done. Ofc current udl is not yet atomic, so in reality something else happens. And we ofc still have the same issue that if you just unload the driver, then the hw will stay on (which might really confuse the driver on next load, when it assumes that it only gets loaded from cold boot where everything is off - which usually is the case on an arm soc at least). > > I get a bit the feeling we're over-optimizing here with trying to devm-ize > > drm_dev_register. Just getting drm_device correctly devm-ized is a big > > step forward already, and will open up a lot of TODO items across a lot of > > drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_* > > structs, which gets released together with drm_device. I think that's a > > much clearer path forward, I think we all agree that getting the kfree out > > of the driver codes is a good thing, and it would allow us to do this > > correctly. > > > > Then once we have that and rolled out to a few drivers we can reconsider >
Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
Ah! The initial clear of the PDs is syncing to everything! Ok, that is easy to fix. Christian. Am 05.02.19 um 16:49 schrieb Kuehling, Felix: > I saw a backtrace from the GPU scheduler when I was debugging this yesterday, > but those backtraces never tell us where the command submission came from. It > could be memory initialization, or some migration at buffer-validation. > Basically, any command submission triggered by page table allocation that > synchronizes with the PD reservation object is a problem. > > Regards, >Felix > > -Original Message- > From: Christian König > Sent: Tuesday, February 05, 2019 10:40 AM > To: Kuehling, Felix ; Koenig, Christian > ; amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand > > Am 05.02.19 um 16:20 schrieb Kuehling, Felix: This may cause regressions in KFD if PT BO allocation can trigger eviction fences. >>> I don't think that can happen, but need to double check as well. >>> >>> Otherwise allocating page tables could try to evict other page tables from >>> the same process and that seriously doesn't make much sense. >> Eviction fences don't cause actual memory evictions. They are the result of >> memory evictions. They cause KFD processes to be preempted in order to allow >> the fence to signal so memory can be evicted. The problem is that a page >> table allocation waits for the fences on the PD reservation, which looks >> like an eviction to KFD and triggers an unnecessary preemption. There is no >> actual memory eviction happening here. > Yeah, but where is that actually coming from? > > It sounds like we still unnecessary synchronize page table updates somewhere. > > Do you have a call chain? > > Regards, > Christian. > >> Regards, >> Felix >> >> -Original Message- >> From: Christian König >> Sent: Tuesday, February 05, 2019 6:37 AM >> To: Kuehling, Felix ; >> amd-gfx@lists.freedesktop.org >> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand >> >>> This may cause regressions in KFD if PT BO allocation can trigger >>> eviction fences. >> I don't think that can happen, but need to double check as well. >> >> Otherwise allocating page tables could try to evict other page tables from >> the same process and that seriously doesn't make much sense. >> >>> Do you know whether PT BO allocation would wait on the page-directory >>> reservation object without the sync-object API anywhere? >> For inside the kernel I can't think of any relevant use case, except for the >> eviction call path and there it is intentional. >> >> One thing which will always wait for all fences is the display code path, >> but that is not relevant here. (Isn't it?). >> >> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this >> is also completely intentional that we wait on everything here. >> >> Regards, >> Christian. >> >> Am 04.02.19 um 21:17 schrieb Kuehling, Felix: >>> This may cause regressions in KFD if PT BO allocation can trigger >>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a >>> context where we had temporarily removed the eviction fence. PT BO >>> allocation in amdgpu_vm_bo_update is not protected like that. >>> >>> I vaguely remember looking into this last time you were working on >>> our eviction fence code and thinking that most of the cases where we >>> remove the eviction fence were no longer needed if fence >>> synchronization happens through the amdgpu_sync-object API (rather >>> than waiting on a reservation object directly). I think it's time I >>> go and finally clean that up. >>> >>> Do you know whether PT BO allocation would wait on the page-directory >>> reservation object without the sync-object API anywhere? >>> >>> Regards, >>> Felix >>> >>> On 2019-02-04 7:42 a.m., Christian König wrote: Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - 5 files changed, 38 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..2176c92f9377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, vm->process_info->eviction_fence, NULL, NULL); - ret =
RE: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
I saw a backtrace from the GPU scheduler when I was debugging this yesterday, but those backtraces never tell us where the command submission came from. It could be memory initialization, or some migration at buffer-validation. Basically, any command submission triggered by page table allocation that synchronizes with the PD reservation object is a problem. Regards, Felix -Original Message- From: Christian König Sent: Tuesday, February 05, 2019 10:40 AM To: Kuehling, Felix ; Koenig, Christian ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand Am 05.02.19 um 16:20 schrieb Kuehling, Felix: >>> This may cause regressions in KFD if PT BO allocation can trigger >>> eviction fences. >> I don't think that can happen, but need to double check as well. >> >> Otherwise allocating page tables could try to evict other page tables from >> the same process and that seriously doesn't make much sense. > Eviction fences don't cause actual memory evictions. They are the result of > memory evictions. They cause KFD processes to be preempted in order to allow > the fence to signal so memory can be evicted. The problem is that a page > table allocation waits for the fences on the PD reservation, which looks like > an eviction to KFD and triggers an unnecessary preemption. There is no actual > memory eviction happening here. Yeah, but where is that actually coming from? It sounds like we still unnecessary synchronize page table updates somewhere. Do you have a call chain? Regards, Christian. > > Regards, >Felix > > -Original Message- > From: Christian König > Sent: Tuesday, February 05, 2019 6:37 AM > To: Kuehling, Felix ; > amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand > >> This may cause regressions in KFD if PT BO allocation can trigger >> eviction fences. > I don't think that can happen, but need to double check as well. > > Otherwise allocating page tables could try to evict other page tables from > the same process and that seriously doesn't make much sense. > >> Do you know whether PT BO allocation would wait on the page-directory >> reservation object without the sync-object API anywhere? > For inside the kernel I can't think of any relevant use case, except for the > eviction call path and there it is intentional. > > One thing which will always wait for all fences is the display code path, but > that is not relevant here. (Isn't it?). > > What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this > is also completely intentional that we wait on everything here. > > Regards, > Christian. > > Am 04.02.19 um 21:17 schrieb Kuehling, Felix: >> This may cause regressions in KFD if PT BO allocation can trigger >> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a >> context where we had temporarily removed the eviction fence. PT BO >> allocation in amdgpu_vm_bo_update is not protected like that. >> >> I vaguely remember looking into this last time you were working on >> our eviction fence code and thinking that most of the cases where we >> remove the eviction fence were no longer needed if fence >> synchronization happens through the amdgpu_sync-object API (rather >> than waiting on a reservation object directly). I think it's time I >> go and finally clean that up. >> >> Do you know whether PT BO allocation would wait on the page-directory >> reservation object without the sync-object API anywhere? >> >> Regards, >> Felix >> >> On 2019-02-04 7:42 a.m., Christian König wrote: >>> Let's start to allocate VM PDs/PTs on demand instead of >>> pre-allocating them during mapping. >>> >>> Signed-off-by: Christian König >>> --- >>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - >>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - >>> 5 files changed, 38 insertions(+), 126 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> index d7b10d79f1de..2176c92f9377 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, >>> struct kgd_mem *mem, >>> >>> vm->process_info->eviction_fence, >>> NULL, NULL); >>> >>> - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); >>> - if (ret) { >>> - pr_err("Failed to allocate pts, err=%d\n", ret); >>> - goto err_alloc_pts; >>> - } >>> - >>> ret = vm_validate_pt_pd_bos(vm); >>> if (ret) { >>>
Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!
Possible, but unlikely. That sounds more like a reference counting problem where something is killing the BO while it is still on the LRU. Need to double check how that can happen, Christian. Am 05.02.19 um 16:02 schrieb Michel Dänzer: Hit this with an amdgpu piglit run today, see the attached dmesg excerpt. It's ttm_bo_ref_bug() being called from ttm_bo_del_from_lru(). Maybe this is still fallout from the "cleanup setting bulk_movable" change? ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
Am 05.02.19 um 16:20 schrieb Kuehling, Felix: This may cause regressions in KFD if PT BO allocation can trigger eviction fences. I don't think that can happen, but need to double check as well. Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense. Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here. Yeah, but where is that actually coming from? It sounds like we still unnecessary synchronize page table updates somewhere. Do you have a call chain? Regards, Christian. Regards, Felix -Original Message- From: Christian König Sent: Tuesday, February 05, 2019 6:37 AM To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand This may cause regressions in KFD if PT BO allocation can trigger eviction fences. I don't think that can happen, but need to double check as well. Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense. Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional. One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?). What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here. Regards, Christian. Am 04.02.19 um 21:17 schrieb Kuehling, Felix: This may cause regressions in KFD if PT BO allocation can trigger eviction fences. The previous call to amdgpu_vm_alloc_pts was in a context where we had temporarily removed the eviction fence. PT BO allocation in amdgpu_vm_bo_update is not protected like that. I vaguely remember looking into this last time you were working on our eviction fence code and thinking that most of the cases where we remove the eviction fence were no longer needed if fence synchronization happens through the amdgpu_sync-object API (rather than waiting on a reservation object directly). I think it's time I go and finally clean that up. Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? Regards, Felix On 2019-02-04 7:42 a.m., Christian König wrote: Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - 5 files changed, 38 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..2176c92f9377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, vm->process_info->eviction_fence, NULL, NULL); - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - goto err_alloc_pts; - } - ret = vm_validate_pt_pd_bos(vm); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..54dd02a898b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - size); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(, ); - return r; - } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
RE: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
> > This may cause regressions in KFD if PT BO allocation can trigger > > eviction fences. > I don't think that can happen, but need to double check as well. > > Otherwise allocating page tables could try to evict other page tables from > the same process and that seriously doesn't make much sense. Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here. Regards, Felix -Original Message- From: Christian König Sent: Tuesday, February 05, 2019 6:37 AM To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand > This may cause regressions in KFD if PT BO allocation can trigger > eviction fences. I don't think that can happen, but need to double check as well. Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense. > Do you know whether PT BO allocation would wait on the page-directory > reservation object without the sync-object API anywhere? For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional. One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?). What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here. Regards, Christian. Am 04.02.19 um 21:17 schrieb Kuehling, Felix: > This may cause regressions in KFD if PT BO allocation can trigger > eviction fences. The previous call to amdgpu_vm_alloc_pts was in a > context where we had temporarily removed the eviction fence. PT BO > allocation in amdgpu_vm_bo_update is not protected like that. > > I vaguely remember looking into this last time you were working on our > eviction fence code and thinking that most of the cases where we > remove the eviction fence were no longer needed if fence > synchronization happens through the amdgpu_sync-object API (rather > than waiting on a reservation object directly). I think it's time I go > and finally clean that up. > > Do you know whether PT BO allocation would wait on the page-directory > reservation object without the sync-object API anywhere? > > Regards, > Felix > > On 2019-02-04 7:42 a.m., Christian König wrote: >> Let's start to allocate VM PDs/PTs on demand instead of >> pre-allocating them during mapping. >> >> Signed-off-by: Christian König >> --- >>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - >>drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- >>drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- >>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- >>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - >>5 files changed, 38 insertions(+), 126 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index d7b10d79f1de..2176c92f9377 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, >> struct kgd_mem *mem, >> vm->process_info->eviction_fence, >> NULL, NULL); >> >> -ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); >> -if (ret) { >> -pr_err("Failed to allocate pts, err=%d\n", ret); >> -goto err_alloc_pts; >> -} >> - >> ret = vm_validate_pt_pd_bos(vm); >> if (ret) { >> pr_err("validate_pt_pd_bos() failed\n"); diff --git >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c >> index 7e22be7ca68a..54dd02a898b9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c >> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> return -ENOMEM; >> } >> >> -r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, >> -size); >> -if (r) { >> -DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); >> -amdgpu_vm_bo_rmv(adev, *bo_va); >> -ttm_eu_backoff_reservation(, ); >> -return r; >> -} >> - >> r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, >> AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | >> AMDGPU_PTE_EXECUTABLE); >> diff --git
kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!
Hit this with an amdgpu piglit run today, see the attached dmesg excerpt. It's ttm_bo_ref_bug() being called from ttm_bo_del_from_lru(). Maybe this is still fallout from the "cleanup setting bulk_movable" change? -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer Feb 5 15:20:59 kaveri kernel: [ 2006.806877] kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196! Feb 5 15:20:59 kaveri kernel: [ 2006.806909] invalid opcode: [#1] SMP KASAN NOPTI Feb 5 15:20:59 kaveri kernel: [ 2006.806922] CPU: 14 PID: 4058 Comm: shader_runner Tainted: G OE 5.0.0-rc1-00261-gbbf48cae572b #119 Feb 5 15:20:59 kaveri kernel: [ 2006.806929] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017 Feb 5 15:20:59 kaveri kernel: [ 2006.806941] RIP: 0010:ttm_bo_ref_bug.constprop.20+0x5/0x10 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.806945] Code: d3 8b 54 24 04 8b 34 24 e9 74 ff ff ff 48 89 ef e8 60 37 cf d3 eb bd 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 0f 1f 44 00 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 55 49 89 fd 41 Feb 5 15:20:59 kaveri kernel: [ 2006.806948] RSP: :88819449f670 EFLAGS: 00010202 Feb 5 15:20:59 kaveri kernel: [ 2006.806952] RAX: 0001 RBX: 88810d4e5dd0 RCX: Feb 5 15:20:59 kaveri kernel: [ 2006.806954] RDX: dc00 RSI: 0004 RDI: 88819449f608 Feb 5 15:20:59 kaveri kernel: [ 2006.806957] RBP: 88810d4e5e78 R08: ed1032893ec2 R09: ed1032893ec1 Feb 5 15:20:59 kaveri kernel: [ 2006.806959] R10: ed1032893ec1 R11: 0003 R12: 888328562bf0 Feb 5 15:20:59 kaveri kernel: [ 2006.806967] R13: 88810d4e5dfc R14: 88810d4e5e80 R15: 8881427eef78 Feb 5 15:20:59 kaveri kernel: [ 2006.806976] FS: 7f8d74113000() GS:88837e18() knlGS: Feb 5 15:20:59 kaveri kernel: [ 2006.806987] CS: 0010 DS: ES: CR0: 80050033 Feb 5 15:20:59 kaveri kernel: [ 2006.806995] CR2: 7f8d6c160210 CR3: 00019f0f4000 CR4: 003406e0 Feb 5 15:20:59 kaveri kernel: [ 2006.806998] Call Trace: Feb 5 15:20:59 kaveri kernel: [ 2006.807009] ttm_bo_del_from_lru+0x2f9/0x430 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807022] ttm_bo_swapout+0x1fc/0x680 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807035] ? ttm_bo_unmap_virtual+0x90/0x90 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807046] ? .slowpath+0xe/0xe Feb 5 15:20:59 kaveri kernel: [ 2006.807051] ? native_queued_spin_lock_slowpath+0x5b1/0x8b0 Feb 5 15:20:59 kaveri kernel: [ 2006.807062] ? find_held_lock+0x33/0x1c0 Feb 5 15:20:59 kaveri kernel: [ 2006.807070] ? ttm_shrink+0x16c/0x210 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807078] ? lock_downgrade+0x5d0/0x5d0 Feb 5 15:20:59 kaveri kernel: [ 2006.807088] ? security_capable+0x54/0x90 Feb 5 15:20:59 kaveri kernel: [ 2006.807100] ttm_shrink+0x18a/0x210 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807116] ttm_mem_global_alloc_zone+0x1b7/0x2f0 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807130] ttm_pool_populate+0x52c/0xae0 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807144] ? ttm_pool_unpopulate+0x40/0x40 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807147] ? lock_downgrade+0x5d0/0x5d0 Feb 5 15:20:59 kaveri kernel: [ 2006.807153] ? mutex_lock_interruptible_nested+0x20/0x20 Feb 5 15:20:59 kaveri kernel: [ 2006.807165] ttm_populate_and_map_pages+0x2a/0x7d0 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807251] ? amdgpu_bo_move_notify+0x310/0x310 [amdgpu] Feb 5 15:20:59 kaveri kernel: [ 2006.807261] ttm_tt_populate.part.8+0x8b/0x2c0 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807270] ? ttm_mem_io_reserve_vm+0xbe/0x2f0 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807276] ? mutex_trylock+0x167/0x1a0 Feb 5 15:20:59 kaveri kernel: [ 2006.807286] ttm_bo_vm_fault+0x777/0x1290 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807300] ? ttm_bo_vm_access+0x680/0x680 [ttm] Feb 5 15:20:59 kaveri kernel: [ 2006.807372] ? amdgpu_drm_ioctl+0xf7/0x1b0 [amdgpu] Feb 5 15:20:59 kaveri kernel: [ 2006.807384] ? mark_held_locks+0x140/0x140 Feb 5 15:20:59 kaveri kernel: [ 2006.807394] __do_fault+0x80/0x320 Feb 5 15:20:59 kaveri kernel: [ 2006.807402] __handle_mm_fault+0x2011/0x39f0 Feb 5 15:20:59 kaveri kernel: [ 2006.807409] ? trace_hardirqs_on_thunk+0x1a/0x1c Feb 5 15:20:59 kaveri kernel: [ 2006.807415] ? vmf_insert_mixed_mkwrite+0x10/0x10 Feb 5 15:20:59 kaveri kernel: [ 2006.807422] ? find_held_lock+0x33/0x1c0 Feb 5 15:20:59 kaveri kernel: [ 2006.807432] ? mark_held_locks+0xc1/0x140 Feb 5 15:20:59 kaveri kernel: [ 2006.807438] ? handle_mm_fault+0x4e7/0x750 Feb 5 15:20:59 kaveri kernel: [ 2006.807446] handle_mm_fault+0x23f/0x750 Feb 5 15:20:59 kaveri kernel: [ 2006.807455] __do_page_fault+0x402/0x9e0 Feb 5 15:20:59 kaveri kernel: [ 2006.807463] ? page_fault+0x8/0x30 Feb 5 15:20:59 kaveri
Re: [PATCH] drm/amdgpu/display: fix compiler errors [-Werror,-Wparentheses-equality]
On 2019-02-04 3:16 a.m., Vishwakarma, Pratik wrote: > Remove extraneous parentheses around the comparison > to silence this warning > > Signed-off-by: Pratik Vishwakarma Reviewed-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c > b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c > index 7d102ac0d61b..1ef0074302c5 100644 > --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c > +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c > @@ -63,7 +63,7 @@ void scaler_settings_calculation(struct > dcn_bw_internal_vars *v) > if (v->interlace_output[k] == 1.0) { > v->v_ratio[k] = 2.0 * v->v_ratio[k]; > } > - if ((v->underscan_output[k] == 1.0)) { > + if (v->underscan_output[k] == 1.0) { > v->h_ratio[k] = v->h_ratio[k] * v->under_scan_factor; > v->v_ratio[k] = v->v_ratio[k] * v->under_scan_factor; > } > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
Am 04.02.19 um 19:23 schrieb Yang, Philip: Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 158 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- 9 files changed, 198 insertions(+), 277 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..0e1711a75b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..ae2d838d31ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages -* should not be allocated -*/ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); i = 0; @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(_list_entry->head); mutex_unlock(_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, ); if (unlikely(ret)) return ret; @@ -1855,25 +1824,11 @@ static int
Re: [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2
Am 04.02.19 um 19:23 schrieb Yang, Philip: There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) In general this sounds correct to me, but apart from that I don't know the code well enough to fully judge. Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang Acked-by: Christian König --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..efe0d3c0215b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, >sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: +* lock(dqm) -> bo::reserve +*/ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, >mqd, >mqd_mem_obj, @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. +* Do uninit_mqd() after dqm_unlock to avoid circular locking. +*/ list_for_each_entry_safe(q, next, >queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1648,6 @@ static int
Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
Am 05.02.19 um 01:33 schrieb Kuehling, Felix: On 2019-02-04 3:17 p.m., Kuehling, Felix wrote: This may cause regressions in KFD if PT BO allocation can trigger eviction fences. The previous call to amdgpu_vm_alloc_pts was in a context where we had temporarily removed the eviction fence. PT BO allocation in amdgpu_vm_bo_update is not protected like that. I vaguely remember looking into this last time you were working on our eviction fence code and thinking that most of the cases where we remove the eviction fence were no longer needed if fence synchronization happens through the amdgpu_sync-object API (rather than waiting on a reservation object directly). I think it's time I go and finally clean that up. I'm looking at this now. It's not working as I was hoping. Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? It doesn't even matter. Buffer moves (anything calling amdgpu_sync_resv with AMDGPU_FENCE_OWNER_UNDEFINED) are supposed to trigger eviction fences. So page table BO allocation triggers eviction fences on the PD reservation. I don't know how to avoid this other than by removing the eviction fence while allocating PT BOs. With your changes this will be potentially every time we map or unmap memory. Any better ideas? Let me take a closer look what exactly is happening here. Regards, Christian. Regards, Felix Regards, Felix On 2019-02-04 7:42 a.m., Christian König wrote: Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - 5 files changed, 38 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..2176c92f9377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, vm->process_info->eviction_fence, NULL, NULL); - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - goto err_alloc_pts; - } - ret = vm_validate_pt_pd_bos(vm); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..54dd02a898b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - size); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(, ); - return r; - } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..e141e3b13112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r =
Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
This may cause regressions in KFD if PT BO allocation can trigger eviction fences. I don't think that can happen, but need to double check as well. Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense. Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional. One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?). What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here. Regards, Christian. Am 04.02.19 um 21:17 schrieb Kuehling, Felix: This may cause regressions in KFD if PT BO allocation can trigger eviction fences. The previous call to amdgpu_vm_alloc_pts was in a context where we had temporarily removed the eviction fence. PT BO allocation in amdgpu_vm_bo_update is not protected like that. I vaguely remember looking into this last time you were working on our eviction fence code and thinking that most of the cases where we remove the eviction fence were no longer needed if fence synchronization happens through the amdgpu_sync-object API (rather than waiting on a reservation object directly). I think it's time I go and finally clean that up. Do you know whether PT BO allocation would wait on the page-directory reservation object without the sync-object API anywhere? Regards, Felix On 2019-02-04 7:42 a.m., Christian König wrote: Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 - 5 files changed, 38 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..2176c92f9377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, vm->process_info->eviction_fence, NULL, NULL); - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - goto err_alloc_pts; - } - ret = vm_validate_pt_pd_bos(vm); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..54dd02a898b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - size); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(, ); - return r; - } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..e141e3b13112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, -
Re: [PATCH] drm/amdgu/vce_v3: start vce block before ring test
Am 05.02.19 um 10:21 schrieb S, Shirish: > On 2/4/2019 9:00 PM, Liu, Leo wrote: >> On 2/4/19 7:49 AM, Koenig, Christian wrote: >>> Am 04.02.19 um 13:44 schrieb S, Shirish: vce ring test fails during resume since mmVCE_RB_RPTR* is not intitalized/updated. Hence start vce block before ring test. >>> Mhm, I wonder why this ever worked. But yeah, same problem seems to >>> exits for VCE 2 as well. >>> >>> Leo any comment on this? >> UVD and VCE start function were at hw_init originally from the bring up >> on all the HW. And later the DPM developer moved them to >> set_powergating_state() for some reason. >> >> @Shirish, are you sure the vce_v3_0_start() is not there? >> >> Just simply adding it back to hw_init, might break the DPM logic, so >> please make sure. > Sure Leo, i will check and get back. I've just double checked the code and at least of hand this patch looks incorrect to me. Submitting commands to the ring should automatically calls amdgpu_vce_ring_begin_use() and so ungate the power and clocks and start the engine. This is actually vital for the ring test, so this patch is a clear NAK. Christian. > > Regards, > > Shirish S > >> Thanks, >> >> Leo >> >> >>> Thanks, >>> Christian. >>> Signed-off-by: Shirish S --- * vce_v4_0.c's hw_init sequence already has this change. drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf1..d809c10 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -469,6 +469,10 @@ static int vce_v3_0_hw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + r = vce_v3_0_start(adev); + if (r) + return r; + vce_v3_0_override_vce_clock_gating(adev, true); amdgpu_asic_set_vce_clocks(adev, 1, 1); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()
Den 05.02.2019 10.11, skrev Daniel Vetter: > On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote: >> >> >> Den 04.02.2019 16.41, skrev Daniel Vetter: >>> On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote: The only thing now that makes drm_dev_unplug() special is that it sets drm_device->unplugged. Move this code to drm_dev_unregister() so that we can remove drm_dev_unplug(). Signed-off-by: Noralf Trønnes --- >> >> [...] >> drivers/gpu/drm/drm_drv.c | 27 +++ include/drm/drm_drv.h | 10 -- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 05bbc2b622fc..e0941200edc6 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit); */ void drm_dev_unplug(struct drm_device *dev) { - /* - * After synchronizing any critical read section is guaranteed to see - * the new value of ->unplugged, and any critical section which might - * still have seen the old value of ->unplugged is guaranteed to have - * finished. - */ - dev->unplugged = true; - synchronize_srcu(_unplug_srcu); - drm_dev_unregister(dev); drm_dev_put(dev); } @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register); * drm_dev_register() but does not deallocate the device. The caller must call * drm_dev_put() to drop their final reference. * - * A special form of unregistering for hotpluggable devices is drm_dev_unplug(), - * which can be called while there are still open users of @dev. + * This function can be called while there are still open users of @dev as long + * as the driver protects its device resources using drm_dev_enter() and + * drm_dev_exit(). * * This should be called first in the device teardown code to make sure - * userspace can't access the device instance any more. + * userspace can't access the device instance any more. Drivers that support + * device unplug will probably want to call drm_atomic_helper_shutdown() first >>> >>> Read once more with a bit more coffee, spotted this: >>> >>> s/first/afterwards/ - shutting down the hw before we've taken it away from >>> userspace is kinda the wrong way round. It should be the inverse of driver >>> load, which is 1) allocate structures 2) prep hw 3) register driver with >>> the world (simplified ofc). >>> >> >> The problem is that drm_dev_unregister() sets the device as unplugged >> and if drm_atomic_helper_shutdown() is called afterwards it's not >> allowed to touch hardware. >> >> I know it's the wrong order, but the only way to do it in the right >> order is to have a separate function that sets unplugged: >> >> drm_dev_unregister(); >> drm_atomic_helper_shutdown(); >> drm_dev_set_unplugged(); > > Annoying ... but yeah calling _shutdown() before we stopped userspace is > also not going to work. Because userspace could quickly re-enable > something, and then the refcounts would be all wrong again and leaking > objects. > What happens with a USB device that is unplugged with open userspace, will that leak objects? > I get a bit the feeling we're over-optimizing here with trying to devm-ize > drm_dev_register. Just getting drm_device correctly devm-ized is a big > step forward already, and will open up a lot of TODO items across a lot of > drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_* > structs, which gets released together with drm_device. I think that's a > much clearer path forward, I think we all agree that getting the kfree out > of the driver codes is a good thing, and it would allow us to do this > correctly. > > Then once we have that and rolled out to a few drivers we can reconsider > the entire unregister/shutdown gordian knot here. Atm I just have no idea > how to do this properly :-/ > > Thoughts, other ideas? > Yeah, I've come to the conclusion that devm_drm_dev_register() doesn't make much sense if we need a driver remove callback anyways. I think devm_drm_dev_init() makes sense because it yields a cleaner probe() function. An additional benefit is that it requires a drm_driver->release function which is a step in the right direction to get the drm_device lifetime right. Do we agree that a drm_dev_set_unplugged() function is necessary to get the remove/disconnect order right? What about drm_dev_unplug() maybe I should just leave it be? - amd uses drm_driver->unload, so that one takes some work to get right to support unplug. It doesn't check the unplugged state, so really doesn't need drm_dev_unplug() I guess. Do they have cards that can be hotplugged? - udl uses drm_driver->unload, doesn't use drm_atomic_helper_shutdown(). It has only one
Re: [PATCH] drm/amdgu/vce_v3: start vce block before ring test
On 2/4/2019 9:00 PM, Liu, Leo wrote: > On 2/4/19 7:49 AM, Koenig, Christian wrote: >> Am 04.02.19 um 13:44 schrieb S, Shirish: >>> vce ring test fails during resume since mmVCE_RB_RPTR* >>> is not intitalized/updated. >>> >>> Hence start vce block before ring test. >> Mhm, I wonder why this ever worked. But yeah, same problem seems to >> exits for VCE 2 as well. >> >> Leo any comment on this? > UVD and VCE start function were at hw_init originally from the bring up > on all the HW. And later the DPM developer moved them to > set_powergating_state() for some reason. > > @Shirish, are you sure the vce_v3_0_start() is not there? > > Just simply adding it back to hw_init, might break the DPM logic, so > please make sure. Sure Leo, i will check and get back. Regards, Shirish S > > Thanks, > > Leo > > >> Thanks, >> Christian. >> >>> Signed-off-by: Shirish S >>> --- >>> * vce_v4_0.c's hw_init sequence already has this change. >>> >>> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 >>> 1 file changed, 4 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c >>> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c >>> index 6ec65cf1..d809c10 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c >>> @@ -469,6 +469,10 @@ static int vce_v3_0_hw_init(void *handle) >>> int r, i; >>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>> >>> + r = vce_v3_0_start(adev); >>> + if (r) >>> + return r; >>> + >>> vce_v3_0_override_vce_clock_gating(adev, true); >>> >>> amdgpu_asic_set_vce_clocks(adev, 1, 1); -- Regards, Shirish S ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()
On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote: > > > Den 04.02.2019 16.41, skrev Daniel Vetter: > > On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote: > >> The only thing now that makes drm_dev_unplug() special is that it sets > >> drm_device->unplugged. Move this code to drm_dev_unregister() so that we > >> can remove drm_dev_unplug(). > >> > >> Signed-off-by: Noralf Trønnes > >> --- > > [...] > > >> drivers/gpu/drm/drm_drv.c | 27 +++ > >> include/drm/drm_drv.h | 10 -- > >> 2 files changed, 19 insertions(+), 18 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > >> index 05bbc2b622fc..e0941200edc6 100644 > >> --- a/drivers/gpu/drm/drm_drv.c > >> +++ b/drivers/gpu/drm/drm_drv.c > >> @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit); > >> */ > >> void drm_dev_unplug(struct drm_device *dev) > >> { > >> - /* > >> - * After synchronizing any critical read section is guaranteed to see > >> - * the new value of ->unplugged, and any critical section which might > >> - * still have seen the old value of ->unplugged is guaranteed to have > >> - * finished. > >> - */ > >> - dev->unplugged = true; > >> - synchronize_srcu(_unplug_srcu); > >> - > >>drm_dev_unregister(dev); > >>drm_dev_put(dev); > >> } > >> @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register); > >> * drm_dev_register() but does not deallocate the device. The caller must > >> call > >> * drm_dev_put() to drop their final reference. > >> * > >> - * A special form of unregistering for hotpluggable devices is > >> drm_dev_unplug(), > >> - * which can be called while there are still open users of @dev. > >> + * This function can be called while there are still open users of @dev > >> as long > >> + * as the driver protects its device resources using drm_dev_enter() and > >> + * drm_dev_exit(). > >> * > >> * This should be called first in the device teardown code to make sure > >> - * userspace can't access the device instance any more. > >> + * userspace can't access the device instance any more. Drivers that > >> support > >> + * device unplug will probably want to call drm_atomic_helper_shutdown() > >> first > > > > Read once more with a bit more coffee, spotted this: > > > > s/first/afterwards/ - shutting down the hw before we've taken it away from > > userspace is kinda the wrong way round. It should be the inverse of driver > > load, which is 1) allocate structures 2) prep hw 3) register driver with > > the world (simplified ofc). > > > > The problem is that drm_dev_unregister() sets the device as unplugged > and if drm_atomic_helper_shutdown() is called afterwards it's not > allowed to touch hardware. > > I know it's the wrong order, but the only way to do it in the right > order is to have a separate function that sets unplugged: > > drm_dev_unregister(); > drm_atomic_helper_shutdown(); > drm_dev_set_unplugged(); Annoying ... but yeah calling _shutdown() before we stopped userspace is also not going to work. Because userspace could quickly re-enable something, and then the refcounts would be all wrong again and leaking objects. I get a bit the feeling we're over-optimizing here with trying to devm-ize drm_dev_register. Just getting drm_device correctly devm-ized is a big step forward already, and will open up a lot of TODO items across a lot of drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_* structs, which gets released together with drm_device. I think that's a much clearer path forward, I think we all agree that getting the kfree out of the driver codes is a good thing, and it would allow us to do this correctly. Then once we have that and rolled out to a few drivers we can reconsider the entire unregister/shutdown gordian knot here. Atm I just have no idea how to do this properly :-/ Thoughts, other ideas? Cheers, Daniel > Noralf. > > >> + * in order to disable the hardware on regular driver module unload. > >> */ > >> void drm_dev_unregister(struct drm_device *dev) > >> { > >> @@ -845,6 +839,15 @@ void drm_dev_unregister(struct drm_device *dev) > >>if (drm_core_check_feature(dev, DRIVER_LEGACY)) > >>drm_lastclose(dev); > >> > >> + /* > >> + * After synchronizing any critical read section is guaranteed to see > >> + * the new value of ->unplugged, and any critical section which might > >> + * still have seen the old value of ->unplugged is guaranteed to have > >> + * finished. > >> + */ > >> + dev->unplugged = true; > >> + synchronize_srcu(_unplug_srcu); > >> + > >>dev->registered = false; > >> > >>drm_client_dev_unregister(dev); > >> diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h > >> index ca46a45a9cce..c50696c82a42 100644 > >> --- a/include/drm/drm_drv.h > >> +++ b/include/drm/drm_drv.h > >> @@ -736,13 +736,11 @@ void drm_dev_unplug(struct drm_device *dev); > >> *
[bug report] drm/amd/display: Call into DC once per multiplane flip
Hello David Francis, This is a semi-automatic email about new static checker warnings. The patch 8a48b44cd00f: "drm/amd/display: Call into DC once per multiplane flip" from Dec 11, 2018, leads to the following Smatch complaint: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4763 amdgpu_dm_commit_planes() warn: variable dereferenced before check 'acrtc_state->stream' (see line 4748) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c 4747 4748 stream_status = dc_stream_get_status(acrtc_state->stream); ^^^ New unchecked dereference inside function. 4749 if (!stream_status) { 4750 DRM_ERROR("No stream status for CRTC: id=%d\n", 4751 acrtc_attach->crtc_id); 4752 continue; 4753 } 4754 4755 surface = stream_status->plane_states[0]; 4756 flip->surface_updates[flip_count].surface = surface; 4757 if (!flip->surface_updates[flip_count].surface) { 4758 DRM_ERROR("No surface for CRTC: id=%d\n", 4759 acrtc_attach->crtc_id); 4760 continue; 4761 } 4762 4763 if (acrtc_state->stream) ^^^ New check is too late. 4764 update_freesync_state_on_stream( 4765 dm, regards, dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx