Re: [PATCH 1/5] drm/amdkfd: Improve doorbell variable names

2019-02-05 Thread Alex Deucher
On Tue, Feb 5, 2019 at 4:43 PM Kuehling, Felix  wrote:
>
> The headline should start with "drm/amdgpu". This change is not KFD
> specific.
>
> I think Alex should review this change.

If we are going to change it, let's be consistent for all IPs (e.g.,
gfx, vcn, etc.).

Alex

>
> Regards,
>Felix
>
> On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> > Indicate that the doorbell offset and range is in dwords.
> >
> > Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436
> > Signed-off-by: Yong Zhao 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++-
> >   drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   | 6 +++---
> >   drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c   | 6 +++---
> >   drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 6 +++---
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 3 ++-
> >   drivers/gpu/drm/amd/amdgpu/soc15.c   | 2 +-
> >   drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +-
> >   drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +-
> >   9 files changed, 25 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index d67f8b1dfe80..6230425f3f3d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs {
> >   void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring 
> > *ring);
> >   u32 (*get_memsize)(struct amdgpu_device *adev);
> >   void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
> > - bool use_doorbell, int doorbell_index, int 
> > doorbell_size);
> > + bool use_doorbell, int index_in_dw, int 
> > range_dw_size);
> >   void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
> >bool enable);
> >   void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> > index 1cfec06f81d4..5c8d04c353d0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> > @@ -39,6 +39,7 @@ struct amdgpu_doorbell {
> >* can be 64-bit, so the index defined is in qword.
> >*/
> >   struct amdgpu_doorbell_index {
> > + uint32_t entry_dw_size;
> >   uint32_t kiq;
> >   uint32_t mec_ring0;
> >   uint32_t mec_ring1;
> > @@ -73,7 +74,7 @@ struct amdgpu_doorbell_index {
> >   };
> >   uint32_t max_assignment;
> >   /* Per engine SDMA doorbell size in dword */
> > - uint32_t sdma_doorbell_range;
> > + uint32_t dw_range_per_sdma_eng;
> >   };
> >
> >   typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
> > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
> > b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> > index cc967dbfd631..64bc41afd71e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> > @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device 
> > *adev)
> >   }
> >
> >   static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int 
> > instance,
> > - bool use_doorbell, int doorbell_index, int 
> > doorbell_size)
> > + bool use_doorbell, int index_in_dw, int range_dw_size)
> >   {
> >   u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, 
> > mmBIF_SDMA0_DOORBELL_RANGE) :
> >   SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
> > @@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct 
> > amdgpu_device *adev, int instan
> >   u32 doorbell_range = RREG32(reg);
> >
> >   if (use_doorbell) {
> > - doorbell_range = REG_SET_FIELD(doorbell_range, 
> > BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
> > - doorbell_range = REG_SET_FIELD(doorbell_range, 
> > BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
> > + doorbell_range = REG_SET_FIELD(doorbell_range, 
> > BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw);
> > + doorbell_range = REG_SET_FIELD(doorbell_range, 
> > BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size);
> >   } else
> >   doorbell_range = REG_SET_FIELD(doorbell_range, 
> > BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> > index 1cdb98ad2db3..28cc96b7a292 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> > @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device 
> > *adev)
> >   }
> >
> >   static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int 
> > instance,
> > - bool use_doorbell, int doorbell_index, int 
> > doorbell_size)
> > + bool 

Re: [PATCH v2 1/2] drm/amdgpu: Fix pci platform speed and width

2019-02-05 Thread Alex Deucher
On Tue, Feb 5, 2019 at 4:38 PM Kasiviswanathan, Harish
 wrote:
>
> The new Vega series GPU cards have in-built bridges. To get the pcie
> speed and width supported by the platform walk the hierarchy and get the
> slowest link.
>
> Change-Id: I3196d158b0c614cbb5d7a34c793a58cb95322d32
> Signed-off-by: Harish Kasiviswanathan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 
> +++---
>  1 file changed, 46 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d7dddb9..fcab1fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3618,6 +3618,38 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> return r;
>  }
>
> +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
> + enum pci_bus_speed *speed,
> + enum pcie_link_width *width)
> +{
> +   struct pci_dev *pdev = adev->pdev;
> +   enum pci_bus_speed cur_speed;
> +   enum pcie_link_width cur_width;
> +
> +   *speed = PCI_SPEED_UNKNOWN;
> +   *width = PCIE_LNK_WIDTH_UNKNOWN;
> +
> +   while (pdev) {
> +   cur_speed = pcie_get_speed_cap(pdev);
> +   cur_width = pcie_get_width_cap(pdev);
> +
> +   if (cur_speed != PCI_SPEED_UNKNOWN) {
> +   if (*speed == PCI_SPEED_UNKNOWN)
> +   *speed = cur_speed;
> +   else if (cur_speed < *speed)
> +   *speed = cur_speed;
> +   }
> +
> +   if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
> +   if (*width == PCIE_LNK_WIDTH_UNKNOWN)
> +   *width = cur_width;
> +   else if (cur_width < *width)
> +   *width = cur_width;
> +   }
> +   pdev = pci_upstream_bridge(pdev);
> +   }
> +}
> +
>  /**
>   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
>   *
> @@ -3630,8 +3662,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
>  {
> struct pci_dev *pdev;
> -   enum pci_bus_speed speed_cap;
> -   enum pcie_link_width link_width;
> +   enum pci_bus_speed speed_cap, platform_speed_cap;
> +   enum pcie_link_width platform_link_width;
>
> if (amdgpu_pcie_gen_cap)
> adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
> @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct 
> amdgpu_device *adev)
> return;
> }
>
> +   if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
> +   return;

I think you can drop this check, but I guess it doesn't hurt.  Either
way, series is:
Reviewed-by: Alex Deucher 

> +
> +   amdgpu_device_get_min_pci_speed_width(adev, _speed_cap,
> + _link_width);
> +
> if (adev->pm.pcie_gen_mask == 0) {
> /* asic caps */
> pdev = adev->pdev;
> @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct 
> amdgpu_device *adev)
> adev->pm.pcie_gen_mask |= 
> CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
> }
> /* platform caps */
> -   pdev = adev->ddev->pdev->bus->self;
> -   speed_cap = pcie_get_speed_cap(pdev);
> -   if (speed_cap == PCI_SPEED_UNKNOWN) {
> +   if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
> adev->pm.pcie_gen_mask |= 
> (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
> } else {
> -   if (speed_cap == PCIE_SPEED_16_0GT)
> +   if (platform_speed_cap == PCIE_SPEED_16_0GT)
> adev->pm.pcie_gen_mask |= 
> (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
> -   else if (speed_cap == PCIE_SPEED_8_0GT)
> +   else if (platform_speed_cap == PCIE_SPEED_8_0GT)
> adev->pm.pcie_gen_mask |= 
> (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
>
> CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
> -   else if (speed_cap == 

Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Kuehling, Felix
On 2019-02-05 11:00 a.m., Koenig, Christian wrote:
> Ah! The initial clear of the PDs is syncing to everything!

Right. Using amdgpu_sync_resv(... AMDGPU_FENCE_OWNER_VM ...) in 
amdgpu_vm_clear_bo seems to help. But if I also change the 
amdgpu_job_submit to use AMDGPU_FENCE_OWNER_VM, I get a VM fault and CP 
hang very early in my test. Not sure what's happening there. This is 
what I changed:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8b240f9..06c28ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -826,17 +826,18 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  
 WARN_ON(job->ibs[0].length_dw > 64);
 r = amdgpu_sync_resv(adev, >sync, bo->tbo.resv,
-AMDGPU_FENCE_OWNER_UNDEFINED, false);
+AMDGPU_FENCE_OWNER_VM, false);
 if (r)
 goto error_free;
  
-   r = amdgpu_job_submit(job, >entity, AMDGPU_FENCE_OWNER_UNDEFINED,
+   r = amdgpu_job_submit(job, >entity, AMDGPU_FENCE_OWNER_VM,
   );
 if (r)
 goto error_free;
  
 amdgpu_bo_fence(bo, fence, true);
-   dma_fence_put(fence);
+   dma_fence_put(vm->last_update);
+   vm->last_update = fence;
  
 if (bo->shadow)
 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,

Basically I tried to do the synchronization exactly like 
amdgpu_vm_update_directories.

The only way this clear could cause a VM fault is, if a subsequent 
PTE/PDS update happens too early and gets overwritten by the pending 
clear. But don't the clear and the next PTE/PDE update go to the same 
queue (vm->entity) and are implicitly synchronized?

Answer after another hour of pouring over code and history: No, sched 
entities are no longer equivalent to queues. The scheduler can itself 
load-balance VM updates using multiple SDMA queues. Sigh.

OK, so page table updates to different PTEs don't usually need to 
synchronize with each other. But how do page table updates to the same 
address get synchronized? How do you guarantee that two updates of the 
same PTE are processed by the hardware in the correct order, if 
AMDGPU_FENCE_OWNER_VM fences don't synchronize with each other?


>
> Ok, that is easy to fix.

Not that easy. See above. ;)

Regards,
   Felix



>
> Christian.
>
> Am 05.02.19 um 16:49 schrieb Kuehling, Felix:
>> I saw a backtrace from the GPU scheduler when I was debugging this 
>> yesterday, but those backtraces never tell us where the command submission 
>> came from. It could be memory initialization, or some migration at 
>> buffer-validation. Basically, any command submission triggered by page table 
>> allocation that synchronizes with the PD reservation object is a problem.
>>
>> Regards,
>> Felix
>>
>> -Original Message-
>> From: Christian König 
>> Sent: Tuesday, February 05, 2019 10:40 AM
>> To: Kuehling, Felix ; Koenig, Christian 
>> ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Am 05.02.19 um 16:20 schrieb Kuehling, Felix:
> This may cause regressions in KFD if PT BO allocation can trigger
> eviction fences.
 I don't think that can happen, but need to double check as well.

 Otherwise allocating page tables could try to evict other page tables from 
 the same process and that seriously doesn't make much sense.
>>> Eviction fences don't cause actual memory evictions. They are the result of 
>>> memory evictions. They cause KFD processes to be preempted in order to 
>>> allow the fence to signal so memory can be evicted. The problem is that a 
>>> page table allocation waits for the fences on the PD reservation, which 
>>> looks like an eviction to KFD and triggers an unnecessary preemption. There 
>>> is no actual memory eviction happening here.
>> Yeah, but where is that actually coming from?
>>
>> It sounds like we still unnecessary synchronize page table updates somewhere.
>>
>> Do you have a call chain?
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>  Felix
>>>
>>> -Original Message-
>>> From: Christian König 
>>> Sent: Tuesday, February 05, 2019 6:37 AM
>>> To: Kuehling, Felix ;
>>> amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
 This may cause regressions in KFD if PT BO allocation can trigger
 eviction fences.
>>> I don't think that can happen, but need to double check as well.
>>>
>>> Otherwise allocating page tables could try to evict other page tables from 
>>> the same process and that seriously doesn't make much sense.
>>>
 Do you know whether PT BO allocation would wait on the page-directory
 reservation object without the sync-object API anywhere?
>>> For inside the kernel I can't think of any relevant use case, except for 
>>> the eviction call path and there it is 

[pull] amdgpu drm-fixes-5.0

2019-02-05 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.0:
- Fix missing freesync properties on eDP
- Fix locking in pasid mgr
- Fix clang warning in kfd
- DC/powerplay fix
- Fix reported rev ids on raven
- Doorbell fix for vega20

The following changes since commit 6e11ea9de9576a644045ffdc2067c09bc2012eda:

  drm/amdgpu: Transfer fences to dmabuf importer (2019-01-30 12:52:44 -0500)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux drm-fixes-5.0

for you to fetch changes up to 7fad8da1ae23171de0ea3cdb2c620f71eb2ab983:

  drm/amd/display: Attach VRR properties for eDP connectors (2019-02-05 
18:10:28 -0500)


Huang Rui (1):
  drm/amdgpu: fix the incorrect external id for raven series

Jay Cornwall (1):
  drm/amdgpu: Implement doorbell self-ring for NBIO 7.4

Nathan Chancellor (1):
  drm/amdkfd: Fix if preprocessor statement above 
kfd_fill_iolink_info_for_cpu

Nicholas Kazlauskas (1):
  drm/amd/display: Attach VRR properties for eDP connectors

Philip Yang (1):
  drm/amdgpu: use spin_lock_irqsave to protect vm_manager.pasid_idr

Roman Li (1):
  drm/amd/display: Fix fclk idle state

 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  5 +++--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c| 13 +
 drivers/gpu/drm/amd/amdgpu/soc15.c|  6 --
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  2 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  3 ++-
 drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c  | 10 +-
 6 files changed, 32 insertions(+), 7 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v7

2019-02-05 Thread Yang, Philip
Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   1 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 158 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|  25 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 178 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   3 +-
 9 files changed, 198 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0b31a1859023..0e1711a75b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
 
atomic_t invalid;
struct amdkfd_process_info *process_info;
-   struct page **user_pages;
 
struct amdgpu_sync sync;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..ae2d838d31ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
goto out;
}
 
-   /* If no restore worker is running concurrently, user_pages
-* should not be allocated
-*/
-   WARN(mem->user_pages, "Leaking user_pages array");
-
-   mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-  sizeof(struct page *),
-  GFP_KERNEL | __GFP_ZERO);
-   if (!mem->user_pages) {
-   pr_err("%s: Failed to allocate pages array\n", __func__);
-   ret = -ENOMEM;
-   goto unregister_out;
-   }
-
-   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-   goto free_out;
+   goto unregister_out;
}
 
-   amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
amdgpu_bo_unreserve(bo);
 
 release_out:
-   if (ret)
-   release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-   kvfree(mem->user_pages);
-   mem->user_pages = NULL;
+   amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = >tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(>kfd_bo.tv.head, >list);
 
amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]);
@@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = >tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(>kfd_bo.tv.head, >list);
 
i = 0;
@@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(_list_entry->head);
mutex_unlock(_info->lock);
 
-   /* Free user pages if necessary */
-   if (mem->user_pages) {
-   pr_debug("%s: Freeing user_pages array\n", __func__);
-   if (mem->user_pages[0])
-   release_pages(mem->user_pages,
-   mem->bo->tbo.ttm->num_pages);
-   kvfree(mem->user_pages);
-   }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, );
if (unlikely(ret))
return ret;
@@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct 

Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6

2019-02-05 Thread Yang, Philip
Hi Christian,

I will submit new patch for review, my comments embedded inline below.

Thanks,
Philip

On 2019-02-05 1:09 p.m., Koenig, Christian wrote:
> Am 05.02.19 um 18:25 schrieb Yang, Philip:
>> [SNIP]+
 +    if (r == -ERESTARTSYS) {
 +    if (!--tries) {
 +    DRM_ERROR("Possible deadlock? Retry too many times\n");
 +    return -EDEADLK;
 +    }
 +    goto restart;
>>> You really need to restart the IOCTL to potentially handle signals.
>>>
>>> But since the calling code correctly handles this already you can just
>>> drop this change as far as I can see.
>>>
>> I agree that we should return -ERESTARTSYS to upper layer to handle signals.
>>
>> But I do not find upper layers handle -ERESTARTSYS in the entire calling
>> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl ->
>> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to
>> application. I confirm it, libdrm userptr test application calling
>> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.
>>
>> So application should handle -ERESTARTSYS to restart the ioctl, but
>> libdrm userptr test application doesn't handle this. This causes the
>> test failed.
> 
> This is a bug in the test cases then.
> 
> -ERESTARTSYS can happen at any time during interruptible waiting and it
> is mandatory for the upper layer to handle it correctly.
> 
-ERESTARTSYS can be returned only when signal is pending, signal handler 
will translate ERESTARTSYS to EINTR, drmIoctl in libdrm does handle 
EINTR and restart the ioctl. The test cases are ok.

Driver fail path should not return ERESTARTSYS to user space. The new 
patch, I change amdgpu_cs_submit to return -EAGAIN if userptr is 
updated, and amdgpu_cs_ioctl redo the ioctl only if error code is 
-EAGAIN. ERESTARTSYS error code returns to user space for signal handle 
as before.

>>
>> Below are details of userptr path difference. For the previous path,
>> libdrm test always goes to step 2, step 3 never trigger. So it never
>> return -ERESTARTSYS, but in theory, this could happen.
>>
>> For HMM path, the test always goes to step 3, we have to handle this
>> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to
>> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will
>> submit new patch.
> 
> Clearly a NAK, this won't work correctly.
> 
I don't understand your concern, may you explain the reason?

> Christian.
> 
>>
>> The previous userptr path:
>> 1. gem_userptr_ioctl to register userptr
>> 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update
>> userptr
>> 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated,
>> return -ERESTARTSYS
>>
>> The new HMM userptr path:
>> 1. gem_userptr_ioctl to register userptr
>> 2. amdgpu_cs_parser_bos, start HMM to track userptr update
>> 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is
>> invalidated, return -ERESTARTSYS
>>
>>
 +    }
 +
     return r;
     }
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 index d21dd2f369da..555285e329ed 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
 *dev, void *data,
     r = amdgpu_bo_reserve(bo, true);
     if (r)
 -    goto free_pages;
 +    goto user_pages_done;
     amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
     r = ttm_bo_validate(>tbo, >placement, );
     amdgpu_bo_unreserve(bo);
     if (r)
 -    goto free_pages;
 +    goto user_pages_done;
     }
     r = drm_gem_handle_create(filp, gobj, );
 -    /* drop reference from allocate - handle holds it now */
 -    drm_gem_object_put_unlocked(gobj);
     if (r)
 -    return r;
 +    goto user_pages_done;
     args->handle = handle;
 -    return 0;
 -free_pages:
 -    release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
 +user_pages_done:
 +    if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
 +    amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
     release_object:
     drm_gem_object_put_unlocked(gobj);
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
 index e356867d2308..fa2516016c43 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
 @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct
 amdgpu_mn_node *node,
     true, false, MAX_SCHEDULE_TIMEOUT);
     if (r <= 0)
     DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 -
 -    amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
     }
     

Re: [PATCH 1/5] drm/amdkfd: Improve doorbell variable names

2019-02-05 Thread Kuehling, Felix
The headline should start with "drm/amdgpu". This change is not KFD 
specific.

I think Alex should review this change.

Regards,
   Felix

On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> Indicate that the doorbell offset and range is in dwords.
>
> Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++-
>   drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   | 6 +++---
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c   | 6 +++---
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 6 +++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 3 ++-
>   drivers/gpu/drm/amd/amdgpu/soc15.c   | 2 +-
>   drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +-
>   drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +-
>   9 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d67f8b1dfe80..6230425f3f3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs {
>   void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
>   u32 (*get_memsize)(struct amdgpu_device *adev);
>   void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
> - bool use_doorbell, int doorbell_index, int 
> doorbell_size);
> + bool use_doorbell, int index_in_dw, int range_dw_size);
>   void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
>bool enable);
>   void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> index 1cfec06f81d4..5c8d04c353d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> @@ -39,6 +39,7 @@ struct amdgpu_doorbell {
>* can be 64-bit, so the index defined is in qword.
>*/
>   struct amdgpu_doorbell_index {
> + uint32_t entry_dw_size;
>   uint32_t kiq;
>   uint32_t mec_ring0;
>   uint32_t mec_ring1;
> @@ -73,7 +74,7 @@ struct amdgpu_doorbell_index {
>   };
>   uint32_t max_assignment;
>   /* Per engine SDMA doorbell size in dword */
> - uint32_t sdma_doorbell_range;
> + uint32_t dw_range_per_sdma_eng;
>   };
>   
>   typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
> b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> index cc967dbfd631..64bc41afd71e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
> @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
>   }
>   
>   static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int 
> instance,
> - bool use_doorbell, int doorbell_index, int 
> doorbell_size)
> + bool use_doorbell, int index_in_dw, int range_dw_size)
>   {
>   u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, 
> mmBIF_SDMA0_DOORBELL_RANGE) :
>   SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
> @@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct 
> amdgpu_device *adev, int instan
>   u32 doorbell_range = RREG32(reg);
>   
>   if (use_doorbell) {
> - doorbell_range = REG_SET_FIELD(doorbell_range, 
> BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
> - doorbell_range = REG_SET_FIELD(doorbell_range, 
> BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
> + doorbell_range = REG_SET_FIELD(doorbell_range, 
> BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw);
> + doorbell_range = REG_SET_FIELD(doorbell_range, 
> BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size);
>   } else
>   doorbell_range = REG_SET_FIELD(doorbell_range, 
> BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> index 1cdb98ad2db3..28cc96b7a292 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
>   }
>   
>   static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int 
> instance,
> - bool use_doorbell, int doorbell_index, int 
> doorbell_size)
> + bool use_doorbell, int index_in_dw, int range_dw_size)
>   {
>   u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, 
> mmBIF_SDMA0_DOORBELL_RANGE) :
>   SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
> @@ -75,8 +75,8 @@ static void nbio_v7_0_sdma_doorbell_range(struct 
> amdgpu_device *adev, int instan
>   u32 doorbell_range = RREG32(reg);
>   
>   

[PATCH v2 1/2] drm/amdgpu: Fix pci platform speed and width

2019-02-05 Thread Kasiviswanathan, Harish
The new Vega series GPU cards have in-built bridges. To get the pcie
speed and width supported by the platform walk the hierarchy and get the
slowest link.

Change-Id: I3196d158b0c614cbb5d7a34c793a58cb95322d32
Signed-off-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++---
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d7dddb9..fcab1fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3618,6 +3618,38 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return r;
 }
 
+static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+   struct pci_dev *pdev = adev->pdev;
+   enum pci_bus_speed cur_speed;
+   enum pcie_link_width cur_width;
+
+   *speed = PCI_SPEED_UNKNOWN;
+   *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+   while (pdev) {
+   cur_speed = pcie_get_speed_cap(pdev);
+   cur_width = pcie_get_width_cap(pdev);
+
+   if (cur_speed != PCI_SPEED_UNKNOWN) {
+   if (*speed == PCI_SPEED_UNKNOWN)
+   *speed = cur_speed;
+   else if (cur_speed < *speed)
+   *speed = cur_speed;
+   }
+
+   if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
+   if (*width == PCIE_LNK_WIDTH_UNKNOWN)
+   *width = cur_width;
+   else if (cur_width < *width)
+   *width = cur_width;
+   }
+   pdev = pci_upstream_bridge(pdev);
+   }
+}
+
 /**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
@@ -3630,8 +3662,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
struct pci_dev *pdev;
-   enum pci_bus_speed speed_cap;
-   enum pcie_link_width link_width;
+   enum pci_bus_speed speed_cap, platform_speed_cap;
+   enum pcie_link_width platform_link_width;
 
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev)
return;
}
 
+   if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
+   return;
+
+   amdgpu_device_get_min_pci_speed_width(adev, _speed_cap,
+ _link_width);
+
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
pdev = adev->pdev;
@@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev)
adev->pm.pcie_gen_mask |= 
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
}
/* platform caps */
-   pdev = adev->ddev->pdev->bus->self;
-   speed_cap = pcie_get_speed_cap(pdev);
-   if (speed_cap == PCI_SPEED_UNKNOWN) {
+   if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
adev->pm.pcie_gen_mask |= 
(CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
} else {
-   if (speed_cap == PCIE_SPEED_16_0GT)
+   if (platform_speed_cap == PCIE_SPEED_16_0GT)
adev->pm.pcie_gen_mask |= 
(CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
-   else if (speed_cap == PCIE_SPEED_8_0GT)
+   else if (platform_speed_cap == PCIE_SPEED_8_0GT)
adev->pm.pcie_gen_mask |= 
(CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
-   else if (speed_cap == PCIE_SPEED_5_0GT)
+   else if (platform_speed_cap == PCIE_SPEED_5_0GT)
adev->pm.pcie_gen_mask |= 
(CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
   
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
else
@@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev)
  

[PATCH v2 2/2] drm/amd/powerplay: add override pcie parameters for Vega20 (v2)

2019-02-05 Thread Kasiviswanathan, Harish
v2: Fix SMU message format
Send override message after SMU enable features

Change-Id: Ib880c83bc7aa12be370cf6619acfe66e12664c9c
Signed-off-by: Harish Kasiviswanathan 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 45 +-
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index da022ca..e1b1656 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -771,40 +771,49 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
return 0;
 }
 
+/*
+ * Override PCIe link speed and link width for DPM Level 1. PPTable entries
+ * reflect the ASIC capabilities and not the system capabilities. For e.g.
+ * Vega20 board in a PCI Gen3 system. In this case, when SMU's tries to switch
+ * to DPM1, it fails as system doesn't support Gen4.
+ */
 static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
-   uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg;
+   uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg;
int ret;
 
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
-   pcie_speed = 16;
+   pcie_gen = 3;
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
-   pcie_speed = 8;
+   pcie_gen = 2;
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
-   pcie_speed = 5;
+   pcie_gen = 1;
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
-   pcie_speed = 2;
+   pcie_gen = 0;
 
if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32)
-   pcie_width = 32;
+   pcie_width = 7;
else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
-   pcie_width = 16;
+   pcie_width = 6;
else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
-   pcie_width = 12;
+   pcie_width = 5;
else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
-   pcie_width = 8;
-   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
pcie_width = 4;
+   else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+   pcie_width = 3;
else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
pcie_width = 2;
else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
pcie_width = 1;
 
-   pcie_arg = pcie_width | (pcie_speed << 8);
-
+   /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1
+* Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
+* Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
+*/
+   smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width;
ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-   PPSMC_MSG_OverridePcieParameters, pcie_arg);
+   PPSMC_MSG_OverridePcieParameters, smu_pcie_arg);
PP_ASSERT_WITH_CODE(!ret,
"[OverridePcieParameters] Attempt to override pcie params 
failed!",
return ret);
@@ -1611,11 +1620,6 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr 
*hwmgr)
"[EnableDPMTasks] Failed to initialize SMC table!",
return result);
 
-   result = vega20_override_pcie_parameters(hwmgr);
-   PP_ASSERT_WITH_CODE(!result,
-   "[EnableDPMTasks] Failed to override pcie parameters!",
-   return result);
-
result = vega20_run_btc(hwmgr);
PP_ASSERT_WITH_CODE(!result,
"[EnableDPMTasks] Failed to run btc!",
@@ -1631,6 +1635,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr 
*hwmgr)
"[EnableDPMTasks] Failed to enable all smu features!",
return result);
 
+   result = vega20_override_pcie_parameters(hwmgr);
+   PP_ASSERT_WITH_CODE(!result,
+   "[EnableDPMTasks] Failed to override pcie parameters!",
+   return result);
+
result = vega20_notify_smc_display_change(hwmgr);
PP_ASSERT_WITH_CODE(!result,
"[EnableDPMTasks] Failed to notify smc display change!",
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/5] drm/amdkfd: Fix bugs regarding CP user queue doorbells mask on SOC15

2019-02-05 Thread Zhao, Yong
Reserved doorbells for SDMA IH and VCN were not properly masked out
when allocating doorbells for CP user queues. This patch fixed that.

Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 17 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h  |  4 
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c  |  3 +++
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c  |  3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  9 
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 21 ++-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   | 19 ++---
 7 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index e957e42c539a..ee8527701731 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
gpu_resources.sdma_doorbell[1][i+1] =
adev->doorbell_index.sdma_engine[1] + 0x200 + 
(i >> 1);
}
-   /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
-* SDMA, IH and VCN. So don't use them for the CP.
+
+   /* Because of the setting in registers like
+* SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+* lower 12 bits of doorbell address for routing. In
+* order to route the CP queue doorbells to CP engine,
+* the doorbells allocated to CP queues have to be
+* outside the range set for SDMA, VCN, and IH blocks
+* Prior to SOC15, all queues use queue ID to
+* determine doorbells.
 */
-   gpu_resources.reserved_doorbell_mask = 0x1e0;
-   gpu_resources.reserved_doorbell_val  = 0x0e0;
+   gpu_resources.reserved_doorbells_start =
+   adev->doorbell_index.sdma_engine[0];
+   gpu_resources.reserved_doorbells_end =
+   adev->doorbell_index.last_non_cp;
 
kgd2kfd_device_init(adev->kfd.dev, _resources);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 59c41841cbce..74b8e2bfabd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -70,6 +70,7 @@ struct amdgpu_doorbell_index {
uint32_t vce_ring6_7;
} uvd_vce;
};
+   uint32_t last_non_cp;
uint32_t max_assignment;
uint32_t last_idx;
/* Per engine SDMA doorbell size in dword */
@@ -141,6 +142,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
+   AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = 
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT= 0x18F,
AMDGPU_VEGA20_DOORBELL_INVALID   = 0x
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
@@ -216,6 +218,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
 
+   AMDGPU_DOORBELL64_LAST_NON_CP = 
AMDGPU_DOORBELL64_VCE_RING6_7,
+
AMDGPU_DOORBELL64_MAX_ASSIGNMENT  = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0x
 } AMDGPU_DOORBELL64_ASSIGNMENT;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index 65214c7b0b20..76166c0ec509 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -80,6 +80,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.uvd_vce.vce_ring2_3 = 
AMDGPU_DOORBELL64_VCE_RING2_3;
adev->doorbell_index.uvd_vce.vce_ring4_5 = 
AMDGPU_DOORBELL64_VCE_RING4_5;
adev->doorbell_index.uvd_vce.vce_ring6_7 = 
AMDGPU_DOORBELL64_VCE_RING6_7;
+
+   adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
+
/* In unit of dword doorbell */
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT 
<< 1;
adev->doorbell_index.dw_range_per_sdma_eng =
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index a388d306391a..10df2fed5a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -84,6 +84,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)

[PATCH 5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

2019-02-05 Thread Zhao, Yong
We can directly calculate the sdma doorbell index in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more, resulting in more
portable code.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 55 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 --
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
 3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index ee8527701731..e62c3703169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   int i, n;
+   int i;
int last_valid_bit;
 
if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.gpuvm_size = min(adev->vm_manager.max_pfn
  << AMDGPU_GPU_PAGE_SHIFT,
  AMDGPU_GMC_HOLE_START),
-   .drm_render_minor = adev->ddev->render->index
+   .drm_render_minor = adev->ddev->render->index,
+   .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
};
 
/* this is going to have a few of the MSBs set that we need to
@@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
_resources.doorbell_aperture_size,
_resources.doorbell_start_offset);
 
-   if (adev->asic_type < CHIP_VEGA10) {
-   kgd2kfd_device_init(adev->kfd.dev, _resources);
-   return;
-   }
-
-   n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-   for (i = 0; i < n; i += 2) {
-   /* On SOC15 the BIF is involved in routing
-* doorbells using the low 12 bits of the
-* address. Communicate the assignments to
-* KFD. KFD uses two doorbell pages per
-* process in case of 64-bit doorbells so we
-* can use each doorbell assignment twice.
+   if (adev->asic_type >= CHIP_VEGA10) {
+   /* Because of the setting in registers like
+* SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+* lower 12 bits of doorbell address for routing. In
+* order to route the CP queue doorbells to CP engine,
+* the doorbells allocated to CP queues have to be
+* outside the range set for SDMA, VCN, and IH blocks
+* Prior to SOC15, all queues use queue ID to
+* determine doorbells.
 */
-   gpu_resources.sdma_doorbell[0][i] =
-   adev->doorbell_index.sdma_engine[0] + (i >> 1);
-   gpu_resources.sdma_doorbell[0][i+1] =
-   adev->doorbell_index.sdma_engine[0] + 0x200 + 
(i >> 1);
-   gpu_resources.sdma_doorbell[1][i] =
-   adev->doorbell_index.sdma_engine[1] + (i >> 1);
-   gpu_resources.sdma_doorbell[1][i+1] =
-   adev->doorbell_index.sdma_engine[1] + 0x200 + 
(i >> 1);
+   gpu_resources.reserved_doorbells_start =
+   adev->doorbell_index.sdma_engine[0];
+   gpu_resources.reserved_doorbells_end =
+   adev->doorbell_index.last_non_cp;
}
 
-   /* Because of the setting in registers like
-* SDMA0_DOORBELL_RANGE etc., BIF statically uses the
-* lower 12 bits of doorbell address for routing. In
-* order to route the CP queue doorbells to CP engine,
-* the doorbells allocated to CP queues have to be
-* outside the range set for SDMA, VCN, and IH blocks
-* Prior to SOC15, all queues use queue ID to
-* determine doorbells.
-*/
-   gpu_resources.reserved_doorbells_start =
-   adev->doorbell_index.sdma_engine[0];
-   gpu_resources.reserved_doorbells_end =
-   adev->doorbell_index.last_non_cp;
-
kgd2kfd_device_init(adev->kfd.dev, _resources);
}
 }
diff --git 

[PATCH 2/5] drm/amdgpu: Fix a bug in setting CP_MEC_DOORBELL_RANGE_UPPER on SOC15

2019-02-05 Thread Zhao, Yong
Because CP can use all doorbells outside the ones reserved for SDMA, IH,
and VCN, so the value set to CP_MEC_DOORBELL_RANGE_UPPER should be the
maximal index possible in a page.

Change-Id: I402a56ce9a80e6c2ed2f96be431ae71ca88e73a4
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 5c8d04c353d0..90eca63605ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -73,6 +73,7 @@ struct amdgpu_doorbell_index {
} uvd_vce;
};
uint32_t max_assignment;
+   uint32_t last_idx;
/* Per engine SDMA doorbell size in dword */
uint32_t dw_range_per_sdma_eng;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 262ee3cf6f1c..0278e3ab6b94 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2998,7 +2998,7 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring 
*ring)
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(adev->doorbell_index.kiq * 2) << 2);
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-   (adev->doorbell_index.userqueue_end * 
2) << 2);
+   (adev->doorbell_index.last_idx * 2) << 2);
}
 
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index d2409df2dde9..9eb8c9209231 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -88,5 +88,8 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
(adev->doorbell_index.sdma_engine[1]
- adev->doorbell_index.sdma_engine[0])
* adev->doorbell_index.entry_dw_size;
+
+   adev->doorbell_index.last_idx = PAGE_SIZE
+   / (sizeof(uint32_t) * adev->doorbell_index.entry_dw_size) - 1;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index b28c5999d8f0..aa8c7699c689 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -91,5 +91,8 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
(adev->doorbell_index.sdma_engine[1]
- adev->doorbell_index.sdma_engine[0])
* adev->doorbell_index.entry_dw_size;
+
+   adev->doorbell_index.last_idx = PAGE_SIZE
+   / (sizeof(uint32_t) * adev->doorbell_index.entry_dw_size) - 1;
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/5] drm/amdgpu: Delete user queue doorbell variables

2019-02-05 Thread Zhao, Yong
They are no longer used, so delete them to avoid confusion.

Change-Id: I3cf23fe7110ff88f53c0c279b2b4ec8d1a53b87c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 8 
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 2 --
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 2 --
 3 files changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 90eca63605ea..59c41841cbce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -49,8 +49,6 @@ struct amdgpu_doorbell_index {
uint32_t mec_ring5;
uint32_t mec_ring6;
uint32_t mec_ring7;
-   uint32_t userqueue_start;
-   uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t sdma_engine[8];
uint32_t ih;
@@ -113,8 +111,6 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL_MEC_RING5   = 0x008,
AMDGPU_VEGA20_DOORBELL_MEC_RING6   = 0x009,
AMDGPU_VEGA20_DOORBELL_MEC_RING7   = 0x00A,
-   AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
-   AMDGPU_VEGA20_DOORBELL_USERQUEUE_END   = 0x08A,
AMDGPU_VEGA20_DOORBELL_GFX_RING0   = 0x08B,
/* SDMA:256~335*/
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0= 0x100,
@@ -178,10 +174,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_MEC_RING6   = 0x09,
AMDGPU_DOORBELL64_MEC_RING7   = 0x0a,
 
-   /* User queue doorbell range (128 doorbells) */
-   AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
-   AMDGPU_DOORBELL64_USERQUEUE_END   = 0x8a,
-
/* Graphics engine */
AMDGPU_DOORBELL64_GFX_RING0   = 0x8b,
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index 9eb8c9209231..65214c7b0b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -68,8 +68,6 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5;
adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6;
adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7;
-   adev->doorbell_index.userqueue_start = 
AMDGPU_DOORBELL64_USERQUEUE_START;
-   adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0;
adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index aa8c7699c689..a388d306391a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -66,8 +66,6 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5;
adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6;
adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7;
-   adev->doorbell_index.userqueue_start = 
AMDGPU_VEGA20_DOORBELL_USERQUEUE_START;
-   adev->doorbell_index.userqueue_end = 
AMDGPU_VEGA20_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0;
adev->doorbell_index.sdma_engine[0] = 
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = 
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/5] drm/amdkfd: Improve doorbell variable names

2019-02-05 Thread Zhao, Yong
Indicate that the doorbell offset and range is in dwords.

Change-Id: Ib0f2564ffa7b1940ffb8725cdc03f662184f5436
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 3 ++-
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   | 6 +++---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c   | 6 +++---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 6 +++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/soc15.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 6 +-
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 6 +-
 9 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..6230425f3f3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -642,7 +642,7 @@ struct amdgpu_nbio_funcs {
void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
-   bool use_doorbell, int doorbell_index, int 
doorbell_size);
+   bool use_doorbell, int index_in_dw, int range_dw_size);
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
 bool enable);
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 1cfec06f81d4..5c8d04c353d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -39,6 +39,7 @@ struct amdgpu_doorbell {
  * can be 64-bit, so the index defined is in qword.
  */
 struct amdgpu_doorbell_index {
+   uint32_t entry_dw_size;
uint32_t kiq;
uint32_t mec_ring0;
uint32_t mec_ring1;
@@ -73,7 +74,7 @@ struct amdgpu_doorbell_index {
};
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
-   uint32_t sdma_doorbell_range;
+   uint32_t dw_range_per_sdma_eng;
 };
 
 typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index cc967dbfd631..64bc41afd71e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
 }
 
 static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int 
instance,
-   bool use_doorbell, int doorbell_index, int 
doorbell_size)
+   bool use_doorbell, int index_in_dw, int range_dw_size)
 {
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_SDMA0_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
@@ -76,8 +76,8 @@ static void nbio_v6_1_sdma_doorbell_range(struct 
amdgpu_device *adev, int instan
u32 doorbell_range = RREG32(reg);
 
if (use_doorbell) {
-   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
-   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
+   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, OFFSET, index_in_dw);
+   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, SIZE, range_dw_size);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1cdb98ad2db3..28cc96b7a292 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
 }
 
 static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int 
instance,
-   bool use_doorbell, int doorbell_index, int 
doorbell_size)
+   bool use_doorbell, int index_in_dw, int range_dw_size)
 {
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, 
mmBIF_SDMA0_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
@@ -75,8 +75,8 @@ static void nbio_v7_0_sdma_doorbell_range(struct 
amdgpu_device *adev, int instan
u32 doorbell_range = RREG32(reg);
 
if (use_doorbell) {
-   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
-   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
+   doorbell_range = REG_SET_FIELD(doorbell_range, 
BIF_SDMA0_DOORBELL_RANGE, OFFSET, 

Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6

2019-02-05 Thread Koenig, Christian
Am 05.02.19 um 18:25 schrieb Yang, Philip:
> [SNIP]+
>>> +    if (r == -ERESTARTSYS) {
>>> +    if (!--tries) {
>>> +    DRM_ERROR("Possible deadlock? Retry too many times\n");
>>> +    return -EDEADLK;
>>> +    }
>>> +    goto restart;
>> You really need to restart the IOCTL to potentially handle signals.
>>
>> But since the calling code correctly handles this already you can just
>> drop this change as far as I can see.
>>
> I agree that we should return -ERESTARTSYS to upper layer to handle signals.
>
> But I do not find upper layers handle -ERESTARTSYS in the entire calling
> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl ->
> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to
> application. I confirm it, libdrm userptr test application calling
> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.
>
> So application should handle -ERESTARTSYS to restart the ioctl, but
> libdrm userptr test application doesn't handle this. This causes the
> test failed.

This is a bug in the test cases then.

-ERESTARTSYS can happen at any time during interruptible waiting and it 
is mandatory for the upper layer to handle it correctly.

>
> Below are details of userptr path difference. For the previous path,
> libdrm test always goes to step 2, step 3 never trigger. So it never
> return -ERESTARTSYS, but in theory, this could happen.
>
> For HMM path, the test always goes to step 3, we have to handle this
> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to
> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will
> submit new patch.

Clearly a NAK, this won't work correctly.

Christian.

>
> The previous userptr path:
> 1. gem_userptr_ioctl to register userptr
> 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update
> userptr
> 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated,
> return -ERESTARTSYS
>
> The new HMM userptr path:
> 1. gem_userptr_ioctl to register userptr
> 2. amdgpu_cs_parser_bos, start HMM to track userptr update
> 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is
> invalidated, return -ERESTARTSYS
>
>
>>> +    }
>>> +
>>>    return r;
>>>    }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d21dd2f369da..555285e329ed 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>> *dev, void *data,
>>>    r = amdgpu_bo_reserve(bo, true);
>>>    if (r)
>>> -    goto free_pages;
>>> +    goto user_pages_done;
>>>    amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
>>>    r = ttm_bo_validate(>tbo, >placement, );
>>>    amdgpu_bo_unreserve(bo);
>>>    if (r)
>>> -    goto free_pages;
>>> +    goto user_pages_done;
>>>    }
>>>    r = drm_gem_handle_create(filp, gobj, );
>>> -    /* drop reference from allocate - handle holds it now */
>>> -    drm_gem_object_put_unlocked(gobj);
>>>    if (r)
>>> -    return r;
>>> +    goto user_pages_done;
>>>    args->handle = handle;
>>> -    return 0;
>>> -free_pages:
>>> -    release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
>>> +user_pages_done:
>>> +    if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
>>> +    amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>>    release_object:
>>>    drm_gem_object_put_unlocked(gobj);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index e356867d2308..fa2516016c43 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct
>>> amdgpu_mn_node *node,
>>>    true, false, MAX_SCHEDULE_TIMEOUT);
>>>    if (r <= 0)
>>>    DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>>> -
>>> -    amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>>>    }
>>>    }
>>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>    mutex_unlock(>mn_lock);
>>>    }
>>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
>>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
>>> +    (1 << 0), /* HMM_PFN_VALID */
>>> +    (1 << 1), /* HMM_PFN_WRITE */
>>> +    0 /* HMM_PFN_DEVICE_PRIVATE */
>>> +};
>>> +
>>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
>>> +    0xfffeUL, /* HMM_PFN_ERROR */
>>> +    0, /* HMM_PFN_NONE */
>>> +    0xfffcUL /* HMM_PFN_SPECIAL */
>>> +};
>>> +
>>> +void amdgpu_hmm_init_range(struct hmm_range *range)
>>> +{
>>> +    if (range) {
>>> +    range->flags = hmm_range_flags;
>>> +    range->values = hmm_range_values;
>>> +    range->pfn_shift = 

Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()

2019-02-05 Thread Noralf Trønnes


Den 05.02.2019 17.31, skrev Daniel Vetter:
> On Tue, Feb 05, 2019 at 11:20:55AM +0100, Noralf Trønnes wrote:
>>
>>
>> Den 05.02.2019 10.11, skrev Daniel Vetter:
>>> On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote:


 Den 04.02.2019 16.41, skrev Daniel Vetter:
> On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote:
>> The only thing now that makes drm_dev_unplug() special is that it sets
>> drm_device->unplugged. Move this code to drm_dev_unregister() so that we
>> can remove drm_dev_unplug().
>>
>> Signed-off-by: Noralf Trønnes 
>> ---

 [...]

>>  drivers/gpu/drm/drm_drv.c | 27 +++
>>  include/drm/drm_drv.h | 10 --
>>  2 files changed, 19 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>> index 05bbc2b622fc..e0941200edc6 100644
>> --- a/drivers/gpu/drm/drm_drv.c
>> +++ b/drivers/gpu/drm/drm_drv.c
>> @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit);
>>   */
>>  void drm_dev_unplug(struct drm_device *dev)
>>  {
>> -/*
>> - * After synchronizing any critical read section is guaranteed 
>> to see
>> - * the new value of ->unplugged, and any critical section which 
>> might
>> - * still have seen the old value of ->unplugged is guaranteed 
>> to have
>> - * finished.
>> - */
>> -dev->unplugged = true;
>> -synchronize_srcu(_unplug_srcu);
>> -
>>  drm_dev_unregister(dev);
>>  drm_dev_put(dev);
>>  }
>> @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register);
>>   * drm_dev_register() but does not deallocate the device. The caller 
>> must call
>>   * drm_dev_put() to drop their final reference.
>>   *
>> - * A special form of unregistering for hotpluggable devices is 
>> drm_dev_unplug(),
>> - * which can be called while there are still open users of @dev.
>> + * This function can be called while there are still open users of @dev 
>> as long
>> + * as the driver protects its device resources using drm_dev_enter() and
>> + * drm_dev_exit().
>>   *
>>   * This should be called first in the device teardown code to make sure
>> - * userspace can't access the device instance any more.
>> + * userspace can't access the device instance any more. Drivers that 
>> support
>> + * device unplug will probably want to call 
>> drm_atomic_helper_shutdown() first
>
> Read once more with a bit more coffee, spotted this:
>
> s/first/afterwards/ - shutting down the hw before we've taken it away from
> userspace is kinda the wrong way round. It should be the inverse of driver
> load, which is 1) allocate structures 2) prep hw 3) register driver with
> the world (simplified ofc).
>

 The problem is that drm_dev_unregister() sets the device as unplugged
 and if drm_atomic_helper_shutdown() is called afterwards it's not
 allowed to touch hardware.

 I know it's the wrong order, but the only way to do it in the right
 order is to have a separate function that sets unplugged:

drm_dev_unregister();
drm_atomic_helper_shutdown();
drm_dev_set_unplugged();
>>>
>>> Annoying ... but yeah calling _shutdown() before we stopped userspace is
>>> also not going to work. Because userspace could quickly re-enable
>>> something, and then the refcounts would be all wrong again and leaking
>>> objects.
>>>
>>
>> What happens with a USB device that is unplugged with open userspace,
>> will that leak objects?
> 
> Maybe we've jumped to conclusions. drm_atomic_helper_shutdown() will run
> as normal, the only thing that should be skipped is actually touching the
> hw (as long as the driver doesn't protect too much with
> drm_dev_enter/exit). So all the software updates (including refcounting
> updates) will still be done. Ofc current udl is not yet atomic, so in
> reality something else happens.
> 
> And we ofc still have the same issue that if you just unload the driver,
> then the hw will stay on (which might really confuse the driver on next
> load, when it assumes that it only gets loaded from cold boot where
> everything is off - which usually is the case on an arm soc at least).
> 
>>> I get a bit the feeling we're over-optimizing here with trying to devm-ize
>>> drm_dev_register. Just getting drm_device correctly devm-ized is a big
>>> step forward already, and will open up a lot of TODO items across a lot of
>>> drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_*
>>> structs, which gets released together with drm_device. I think that's a
>>> much clearer path forward, I think we all agree that getting the kfree out
>>> of the driver codes is a good thing, and it would allow us to do this
>>> correctly.
>>>
>>> 

Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-02-05 Thread Michel Dänzer
On 2019-02-05 4:45 p.m., Christian König wrote:
> Possible, but unlikely.
> 
> That sounds more like a reference counting problem where something is
> killing the BO while it is still on the LRU.

FWIW, I've hit this twice now today, whereas I don't remember ever
hitting it before (not 100% sure though).

I reverted the remaining hunk of the "cleanup setting bulk_movable"
change, and it survived a piglit run. Could just be luck, though...


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6

2019-02-05 Thread Yang, Philip
Hi Christian,

My comments are embedded below. I will submit another patch to address 
those.

Thanks,
Philip

On 2019-02-05 6:52 a.m., Christian König wrote:
> Am 04.02.19 um 19:23 schrieb Yang, Philip:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
>> Signed-off-by: Philip Yang 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    | 158 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c    |  25 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h    |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 173 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   3 +-
>>   9 files changed, 198 insertions(+), 277 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 0b31a1859023..0e1711a75b68 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -61,7 +61,6 @@ struct kgd_mem {
>>   atomic_t invalid;
>>   struct amdkfd_process_info *process_info;
>> -    struct page **user_pages;
>>   struct amdgpu_sync sync;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index d7b10d79f1de..ae2d838d31ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, 
>> struct mm_struct *mm,
>>   goto out;
>>   }
>> -    /* If no restore worker is running concurrently, user_pages
>> - * should not be allocated
>> - */
>> -    WARN(mem->user_pages, "Leaking user_pages array");
>> -
>> -    mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
>> -   sizeof(struct page *),
>> -   GFP_KERNEL | __GFP_ZERO);
>> -    if (!mem->user_pages) {
>> -    pr_err("%s: Failed to allocate pages array\n", __func__);
>> -    ret = -ENOMEM;
>> -    goto unregister_out;
>> -    }
>> -
>> -    ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
>> +    ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>>   if (ret) {
>>   pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
>> -    goto free_out;
>> +    goto unregister_out;
>>   }
>> -    amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
>> -
>>   ret = amdgpu_bo_reserve(bo, true);
>>   if (ret) {
>>   pr_err("%s: Failed to reserve BO\n", __func__);
>> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, 
>> struct mm_struct *mm,
>>   amdgpu_bo_unreserve(bo);
>>   release_out:
>> -    if (ret)
>> -    release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -free_out:
>> -    kvfree(mem->user_pages);
>> -    mem->user_pages = NULL;
>> +    amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>   unregister_out:
>>   if (ret)
>>   amdgpu_mn_unregister(bo);
>> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>>   ctx->kfd_bo.priority = 0;
>>   ctx->kfd_bo.tv.bo = >tbo;
>>   ctx->kfd_bo.tv.num_shared = 1;
>> -    ctx->kfd_bo.user_pages = NULL;
>>   list_add(>kfd_bo.tv.head, >list);
>>   amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]);
>> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem 
>> *mem,
>>   ctx->kfd_bo.priority = 0;
>>   ctx->kfd_bo.tv.bo = >tbo;
>>   ctx->kfd_bo.tv.num_shared = 1;
>> -    ctx->kfd_bo.user_pages = NULL;
>>   list_add(>kfd_bo.tv.head, >list);
>>   i = 0;
>> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>   list_del(_list_entry->head);
>>   mutex_unlock(_info->lock);
>> -    /* Free user pages if necessary */
>> -    if (mem->user_pages) {
>> -    pr_debug("%s: Freeing user_pages array\n", __func__);
>> -    if (mem->user_pages[0])
>> -    release_pages(mem->user_pages,
>> -  

Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()

2019-02-05 Thread Daniel Vetter
On Tue, Feb 05, 2019 at 11:20:55AM +0100, Noralf Trønnes wrote:
> 
> 
> Den 05.02.2019 10.11, skrev Daniel Vetter:
> > On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote:
> >>
> >>
> >> Den 04.02.2019 16.41, skrev Daniel Vetter:
> >>> On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote:
>  The only thing now that makes drm_dev_unplug() special is that it sets
>  drm_device->unplugged. Move this code to drm_dev_unregister() so that we
>  can remove drm_dev_unplug().
> 
>  Signed-off-by: Noralf Trønnes 
>  ---
> >>
> >> [...]
> >>
>   drivers/gpu/drm/drm_drv.c | 27 +++
>   include/drm/drm_drv.h | 10 --
>   2 files changed, 19 insertions(+), 18 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>  index 05bbc2b622fc..e0941200edc6 100644
>  --- a/drivers/gpu/drm/drm_drv.c
>  +++ b/drivers/gpu/drm/drm_drv.c
>  @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit);
>    */
>   void drm_dev_unplug(struct drm_device *dev)
>   {
>  -/*
>  - * After synchronizing any critical read section is guaranteed 
>  to see
>  - * the new value of ->unplugged, and any critical section which 
>  might
>  - * still have seen the old value of ->unplugged is guaranteed 
>  to have
>  - * finished.
>  - */
>  -dev->unplugged = true;
>  -synchronize_srcu(_unplug_srcu);
>  -
>   drm_dev_unregister(dev);
>   drm_dev_put(dev);
>   }
>  @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register);
>    * drm_dev_register() but does not deallocate the device. The caller 
>  must call
>    * drm_dev_put() to drop their final reference.
>    *
>  - * A special form of unregistering for hotpluggable devices is 
>  drm_dev_unplug(),
>  - * which can be called while there are still open users of @dev.
>  + * This function can be called while there are still open users of @dev 
>  as long
>  + * as the driver protects its device resources using drm_dev_enter() and
>  + * drm_dev_exit().
>    *
>    * This should be called first in the device teardown code to make sure
>  - * userspace can't access the device instance any more.
>  + * userspace can't access the device instance any more. Drivers that 
>  support
>  + * device unplug will probably want to call 
>  drm_atomic_helper_shutdown() first
> >>>
> >>> Read once more with a bit more coffee, spotted this:
> >>>
> >>> s/first/afterwards/ - shutting down the hw before we've taken it away from
> >>> userspace is kinda the wrong way round. It should be the inverse of driver
> >>> load, which is 1) allocate structures 2) prep hw 3) register driver with
> >>> the world (simplified ofc).
> >>>
> >>
> >> The problem is that drm_dev_unregister() sets the device as unplugged
> >> and if drm_atomic_helper_shutdown() is called afterwards it's not
> >> allowed to touch hardware.
> >>
> >> I know it's the wrong order, but the only way to do it in the right
> >> order is to have a separate function that sets unplugged:
> >>
> >>drm_dev_unregister();
> >>drm_atomic_helper_shutdown();
> >>drm_dev_set_unplugged();
> > 
> > Annoying ... but yeah calling _shutdown() before we stopped userspace is
> > also not going to work. Because userspace could quickly re-enable
> > something, and then the refcounts would be all wrong again and leaking
> > objects.
> > 
> 
> What happens with a USB device that is unplugged with open userspace,
> will that leak objects?

Maybe we've jumped to conclusions. drm_atomic_helper_shutdown() will run
as normal, the only thing that should be skipped is actually touching the
hw (as long as the driver doesn't protect too much with
drm_dev_enter/exit). So all the software updates (including refcounting
updates) will still be done. Ofc current udl is not yet atomic, so in
reality something else happens.

And we ofc still have the same issue that if you just unload the driver,
then the hw will stay on (which might really confuse the driver on next
load, when it assumes that it only gets loaded from cold boot where
everything is off - which usually is the case on an arm soc at least).

> > I get a bit the feeling we're over-optimizing here with trying to devm-ize
> > drm_dev_register. Just getting drm_device correctly devm-ized is a big
> > step forward already, and will open up a lot of TODO items across a lot of
> > drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_*
> > structs, which gets released together with drm_device. I think that's a
> > much clearer path forward, I think we all agree that getting the kfree out
> > of the driver codes is a good thing, and it would allow us to do this
> > correctly.
> > 
> > Then once we have that and rolled out to a few drivers we can reconsider
> 

Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Koenig, Christian
Ah! The initial clear of the PDs is syncing to everything!

Ok, that is easy to fix.

Christian.

Am 05.02.19 um 16:49 schrieb Kuehling, Felix:
> I saw a backtrace from the GPU scheduler when I was debugging this yesterday, 
> but those backtraces never tell us where the command submission came from. It 
> could be memory initialization, or some migration at buffer-validation. 
> Basically, any command submission triggered by page table allocation that 
> synchronizes with the PD reservation object is a problem.
>
> Regards,
>Felix
>
> -Original Message-
> From: Christian König 
> Sent: Tuesday, February 05, 2019 10:40 AM
> To: Kuehling, Felix ; Koenig, Christian 
> ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>
> Am 05.02.19 um 16:20 schrieb Kuehling, Felix:
 This may cause regressions in KFD if PT BO allocation can trigger
 eviction fences.
>>> I don't think that can happen, but need to double check as well.
>>>
>>> Otherwise allocating page tables could try to evict other page tables from 
>>> the same process and that seriously doesn't make much sense.
>> Eviction fences don't cause actual memory evictions. They are the result of 
>> memory evictions. They cause KFD processes to be preempted in order to allow 
>> the fence to signal so memory can be evicted. The problem is that a page 
>> table allocation waits for the fences on the PD reservation, which looks 
>> like an eviction to KFD and triggers an unnecessary preemption. There is no 
>> actual memory eviction happening here.
> Yeah, but where is that actually coming from?
>
> It sounds like we still unnecessary synchronize page table updates somewhere.
>
> Do you have a call chain?
>
> Regards,
> Christian.
>
>> Regards,
>> Felix
>>
>> -Original Message-
>> From: Christian König 
>> Sent: Tuesday, February 05, 2019 6:37 AM
>> To: Kuehling, Felix ;
>> amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>>> This may cause regressions in KFD if PT BO allocation can trigger
>>> eviction fences.
>> I don't think that can happen, but need to double check as well.
>>
>> Otherwise allocating page tables could try to evict other page tables from 
>> the same process and that seriously doesn't make much sense.
>>
>>> Do you know whether PT BO allocation would wait on the page-directory
>>> reservation object without the sync-object API anywhere?
>> For inside the kernel I can't think of any relevant use case, except for the 
>> eviction call path and there it is intentional.
>>
>> One thing which will always wait for all fences is the display code path, 
>> but that is not relevant here. (Isn't it?).
>>
>> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this 
>> is also completely intentional that we wait on everything here.
>>
>> Regards,
>> Christian.
>>
>> Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
>>> This may cause regressions in KFD if PT BO allocation can trigger
>>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
>>> context where we had temporarily removed the eviction fence. PT BO
>>> allocation in amdgpu_vm_bo_update is not protected like that.
>>>
>>> I vaguely remember looking into this last time you were working on
>>> our eviction fence code and thinking that most of the cases where we
>>> remove the eviction fence were no longer needed if fence
>>> synchronization happens through the amdgpu_sync-object API (rather
>>> than waiting on a reservation object directly). I think it's time I
>>> go and finally clean that up.
>>>
>>> Do you know whether PT BO allocation would wait on the page-directory
>>> reservation object without the sync-object API anywhere?
>>>
>>> Regards,
>>>   Felix
>>>
>>> On 2019-02-04 7:42 a.m., Christian König wrote:
 Let's start to allocate VM PDs/PTs on demand instead of
 pre-allocating them during mapping.

 Signed-off-by: Christian König 
 ---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
  5 files changed, 38 insertions(+), 126 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 index d7b10d79f1de..2176c92f9377 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, 
 struct kgd_mem *mem,

 vm->process_info->eviction_fence,
NULL, NULL);
  
 -  ret = 

RE: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Kuehling, Felix
I saw a backtrace from the GPU scheduler when I was debugging this yesterday, 
but those backtraces never tell us where the command submission came from. It 
could be memory initialization, or some migration at buffer-validation. 
Basically, any command submission triggered by page table allocation that 
synchronizes with the PD reservation object is a problem.

Regards,
  Felix

-Original Message-
From: Christian König  
Sent: Tuesday, February 05, 2019 10:40 AM
To: Kuehling, Felix ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

Am 05.02.19 um 16:20 schrieb Kuehling, Felix:
>>> This may cause regressions in KFD if PT BO allocation can trigger 
>>> eviction fences.
>> I don't think that can happen, but need to double check as well.
>>
>> Otherwise allocating page tables could try to evict other page tables from 
>> the same process and that seriously doesn't make much sense.
> Eviction fences don't cause actual memory evictions. They are the result of 
> memory evictions. They cause KFD processes to be preempted in order to allow 
> the fence to signal so memory can be evicted. The problem is that a page 
> table allocation waits for the fences on the PD reservation, which looks like 
> an eviction to KFD and triggers an unnecessary preemption. There is no actual 
> memory eviction happening here.

Yeah, but where is that actually coming from?

It sounds like we still unnecessary synchronize page table updates somewhere.

Do you have a call chain?

Regards,
Christian.

>
> Regards,
>Felix
>
> -Original Message-
> From: Christian König 
> Sent: Tuesday, February 05, 2019 6:37 AM
> To: Kuehling, Felix ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>
>> This may cause regressions in KFD if PT BO allocation can trigger 
>> eviction fences.
> I don't think that can happen, but need to double check as well.
>
> Otherwise allocating page tables could try to evict other page tables from 
> the same process and that seriously doesn't make much sense.
>
>> Do you know whether PT BO allocation would wait on the page-directory 
>> reservation object without the sync-object API anywhere?
> For inside the kernel I can't think of any relevant use case, except for the 
> eviction call path and there it is intentional.
>
> One thing which will always wait for all fences is the display code path, but 
> that is not relevant here. (Isn't it?).
>
> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this 
> is also completely intentional that we wait on everything here.
>
> Regards,
> Christian.
>
> Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
>> This may cause regressions in KFD if PT BO allocation can trigger 
>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a 
>> context where we had temporarily removed the eviction fence. PT BO 
>> allocation in amdgpu_vm_bo_update is not protected like that.
>>
>> I vaguely remember looking into this last time you were working on 
>> our eviction fence code and thinking that most of the cases where we 
>> remove the eviction fence were no longer needed if fence 
>> synchronization happens through the amdgpu_sync-object API (rather 
>> than waiting on a reservation object directly). I think it's time I 
>> go and finally clean that up.
>>
>> Do you know whether PT BO allocation would wait on the page-directory 
>> reservation object without the sync-object API anywhere?
>>
>> Regards,
>>  Felix
>>
>> On 2019-02-04 7:42 a.m., Christian König wrote:
>>> Let's start to allocate VM PDs/PTs on demand instead of 
>>> pre-allocating them during mapping.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
>>> 5 files changed, 38 insertions(+), 126 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index d7b10d79f1de..2176c92f9377 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, 
>>> struct kgd_mem *mem,
>>> 
>>> vm->process_info->eviction_fence,
>>> NULL, NULL);
>>> 
>>> -   ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>> -   if (ret) {
>>> -   pr_err("Failed to allocate pts, err=%d\n", ret);
>>> -   goto err_alloc_pts;
>>> -   }
>>> -
>>> ret = vm_validate_pt_pd_bos(vm);
>>> if (ret) {
>>>

Re: kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-02-05 Thread Christian König

Possible, but unlikely.

That sounds more like a reference counting problem where something is 
killing the BO while it is still on the LRU.


Need to double check how that can happen,
Christian.

Am 05.02.19 um 16:02 schrieb Michel Dänzer:

Hit this with an amdgpu piglit run today, see the attached dmesg
excerpt. It's ttm_bo_ref_bug() being called from ttm_bo_del_from_lru().
Maybe this is still fallout from the "cleanup setting bulk_movable" change?



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Christian König

Am 05.02.19 um 16:20 schrieb Kuehling, Felix:

This may cause regressions in KFD if PT BO allocation can trigger
eviction fences.

I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables from the 
same process and that seriously doesn't make much sense.

Eviction fences don't cause actual memory evictions. They are the result of 
memory evictions. They cause KFD processes to be preempted in order to allow 
the fence to signal so memory can be evicted. The problem is that a page table 
allocation waits for the fences on the PD reservation, which looks like an 
eviction to KFD and triggers an unnecessary preemption. There is no actual 
memory eviction happening here.


Yeah, but where is that actually coming from?

It sounds like we still unnecessary synchronize page table updates 
somewhere.


Do you have a call chain?

Regards,
Christian.



Regards,
   Felix

-Original Message-
From: Christian König 
Sent: Tuesday, February 05, 2019 6:37 AM
To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand


This may cause regressions in KFD if PT BO allocation can trigger
eviction fences.

I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables from the 
same process and that seriously doesn't make much sense.


Do you know whether PT BO allocation would wait on the page-directory
reservation object without the sync-object API anywhere?

For inside the kernel I can't think of any relevant use case, except for the 
eviction call path and there it is intentional.

One thing which will always wait for all fences is the display code path, but 
that is not relevant here. (Isn't it?).

What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is 
also completely intentional that we wait on everything here.

Regards,
Christian.

Am 04.02.19 um 21:17 schrieb Kuehling, Felix:

This may cause regressions in KFD if PT BO allocation can trigger
eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
context where we had temporarily removed the eviction fence. PT BO
allocation in amdgpu_vm_bo_update is not protected like that.

I vaguely remember looking into this last time you were working on our
eviction fence code and thinking that most of the cases where we
remove the eviction fence were no longer needed if fence
synchronization happens through the amdgpu_sync-object API (rather
than waiting on a reservation object directly). I think it's time I go
and finally clean that up.

Do you know whether PT BO allocation would wait on the page-directory
reservation object without the sync-object API anywhere?

Regards,
     Felix

On 2019-02-04 7:42 a.m., Christian König wrote:

Let's start to allocate VM PDs/PTs on demand instead of
pre-allocating them during mapping.

Signed-off-by: Christian König 
---
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
5 files changed, 38 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..2176c92f9377 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct 
kgd_mem *mem,
vm->process_info->eviction_fence,
NULL, NULL);

-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

-   if (ret) {
-   pr_err("Failed to allocate pts, err=%d\n", ret);
-   goto err_alloc_pts;
-   }
-
ret = vm_validate_pt_pd_bos(vm);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n"); diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
return -ENOMEM;
}

-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

-   size);
-   if (r) {
-   DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
-   amdgpu_vm_bo_rmv(adev, *bo_va);
-   ttm_eu_backoff_reservation(, );
-   return r;
-   }
-
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 

RE: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Kuehling, Felix
> > This may cause regressions in KFD if PT BO allocation can trigger 
> > eviction fences.
> I don't think that can happen, but need to double check as well.
> 
> Otherwise allocating page tables could try to evict other page tables from 
> the same process and that seriously doesn't make much sense.

Eviction fences don't cause actual memory evictions. They are the result of 
memory evictions. They cause KFD processes to be preempted in order to allow 
the fence to signal so memory can be evicted. The problem is that a page table 
allocation waits for the fences on the PD reservation, which looks like an 
eviction to KFD and triggers an unnecessary preemption. There is no actual 
memory eviction happening here.

Regards,
  Felix

-Original Message-
From: Christian König  
Sent: Tuesday, February 05, 2019 6:37 AM
To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

> This may cause regressions in KFD if PT BO allocation can trigger 
> eviction fences.
I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables from the 
same process and that seriously doesn't make much sense.

> Do you know whether PT BO allocation would wait on the page-directory 
> reservation object without the sync-object API anywhere?
For inside the kernel I can't think of any relevant use case, except for the 
eviction call path and there it is intentional.

One thing which will always wait for all fences is the display code path, but 
that is not relevant here. (Isn't it?).

What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is 
also completely intentional that we wait on everything here.

Regards,
Christian.

Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
> This may cause regressions in KFD if PT BO allocation can trigger 
> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a 
> context where we had temporarily removed the eviction fence. PT BO 
> allocation in amdgpu_vm_bo_update is not protected like that.
>
> I vaguely remember looking into this last time you were working on our 
> eviction fence code and thinking that most of the cases where we 
> remove the eviction fence were no longer needed if fence 
> synchronization happens through the amdgpu_sync-object API (rather 
> than waiting on a reservation object directly). I think it's time I go 
> and finally clean that up.
>
> Do you know whether PT BO allocation would wait on the page-directory 
> reservation object without the sync-object API anywhere?
>
> Regards,
>     Felix
>
> On 2019-02-04 7:42 a.m., Christian König wrote:
>> Let's start to allocate VM PDs/PTs on demand instead of 
>> pre-allocating them during mapping.
>>
>> Signed-off-by: Christian König 
>> ---
>>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
>>drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
>>drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
>>5 files changed, 38 insertions(+), 126 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index d7b10d79f1de..2176c92f9377 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, 
>> struct kgd_mem *mem,
>>  vm->process_info->eviction_fence,
>>  NULL, NULL);
>>
>> -ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>> -if (ret) {
>> -pr_err("Failed to allocate pts, err=%d\n", ret);
>> -goto err_alloc_pts;
>> -}
>> -
>>  ret = vm_validate_pt_pd_bos(vm);
>>  if (ret) {
>>  pr_err("validate_pt_pd_bos() failed\n"); diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> index 7e22be7ca68a..54dd02a898b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>  return -ENOMEM;
>>  }
>>
>> -r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>> -size);
>> -if (r) {
>> -DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
>> -amdgpu_vm_bo_rmv(adev, *bo_va);
>> -ttm_eu_backoff_reservation(, );
>> -return r;
>> -}
>> -
>>  r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>   AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
>>   AMDGPU_PTE_EXECUTABLE);
>> diff --git 

kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!

2019-02-05 Thread Michel Dänzer

Hit this with an amdgpu piglit run today, see the attached dmesg
excerpt. It's ttm_bo_ref_bug() being called from ttm_bo_del_from_lru().
Maybe this is still fallout from the "cleanup setting bulk_movable" change?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
Feb  5 15:20:59 kaveri kernel: [ 2006.806877] kernel BUG at drivers/gpu/drm//ttm/ttm_bo.c:196!
Feb  5 15:20:59 kaveri kernel: [ 2006.806909] invalid opcode:  [#1] SMP KASAN NOPTI
Feb  5 15:20:59 kaveri kernel: [ 2006.806922] CPU: 14 PID: 4058 Comm: shader_runner Tainted: G   OE 5.0.0-rc1-00261-gbbf48cae572b #119
Feb  5 15:20:59 kaveri kernel: [ 2006.806929] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Feb  5 15:20:59 kaveri kernel: [ 2006.806941] RIP: 0010:ttm_bo_ref_bug.constprop.20+0x5/0x10 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.806945] Code: d3 8b 54 24 04 8b 34 24 e9 74 ff ff ff 48 89 ef e8 60 37 cf d3 eb bd 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 0f 1f 44 00 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 55 49 89 fd 41
Feb  5 15:20:59 kaveri kernel: [ 2006.806948] RSP: :88819449f670 EFLAGS: 00010202
Feb  5 15:20:59 kaveri kernel: [ 2006.806952] RAX: 0001 RBX: 88810d4e5dd0 RCX: 
Feb  5 15:20:59 kaveri kernel: [ 2006.806954] RDX: dc00 RSI: 0004 RDI: 88819449f608
Feb  5 15:20:59 kaveri kernel: [ 2006.806957] RBP: 88810d4e5e78 R08: ed1032893ec2 R09: ed1032893ec1
Feb  5 15:20:59 kaveri kernel: [ 2006.806959] R10: ed1032893ec1 R11: 0003 R12: 888328562bf0
Feb  5 15:20:59 kaveri kernel: [ 2006.806967] R13: 88810d4e5dfc R14: 88810d4e5e80 R15: 8881427eef78
Feb  5 15:20:59 kaveri kernel: [ 2006.806976] FS:  7f8d74113000() GS:88837e18() knlGS:
Feb  5 15:20:59 kaveri kernel: [ 2006.806987] CS:  0010 DS:  ES:  CR0: 80050033
Feb  5 15:20:59 kaveri kernel: [ 2006.806995] CR2: 7f8d6c160210 CR3: 00019f0f4000 CR4: 003406e0
Feb  5 15:20:59 kaveri kernel: [ 2006.806998] Call Trace:
Feb  5 15:20:59 kaveri kernel: [ 2006.807009]  ttm_bo_del_from_lru+0x2f9/0x430 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807022]  ttm_bo_swapout+0x1fc/0x680 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807035]  ? ttm_bo_unmap_virtual+0x90/0x90 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807046]  ? .slowpath+0xe/0xe
Feb  5 15:20:59 kaveri kernel: [ 2006.807051]  ? native_queued_spin_lock_slowpath+0x5b1/0x8b0
Feb  5 15:20:59 kaveri kernel: [ 2006.807062]  ? find_held_lock+0x33/0x1c0
Feb  5 15:20:59 kaveri kernel: [ 2006.807070]  ? ttm_shrink+0x16c/0x210 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807078]  ? lock_downgrade+0x5d0/0x5d0
Feb  5 15:20:59 kaveri kernel: [ 2006.807088]  ? security_capable+0x54/0x90
Feb  5 15:20:59 kaveri kernel: [ 2006.807100]  ttm_shrink+0x18a/0x210 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807116]  ttm_mem_global_alloc_zone+0x1b7/0x2f0 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807130]  ttm_pool_populate+0x52c/0xae0 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807144]  ? ttm_pool_unpopulate+0x40/0x40 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807147]  ? lock_downgrade+0x5d0/0x5d0
Feb  5 15:20:59 kaveri kernel: [ 2006.807153]  ? mutex_lock_interruptible_nested+0x20/0x20
Feb  5 15:20:59 kaveri kernel: [ 2006.807165]  ttm_populate_and_map_pages+0x2a/0x7d0 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807251]  ? amdgpu_bo_move_notify+0x310/0x310 [amdgpu]
Feb  5 15:20:59 kaveri kernel: [ 2006.807261]  ttm_tt_populate.part.8+0x8b/0x2c0 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807270]  ? ttm_mem_io_reserve_vm+0xbe/0x2f0 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807276]  ? mutex_trylock+0x167/0x1a0
Feb  5 15:20:59 kaveri kernel: [ 2006.807286]  ttm_bo_vm_fault+0x777/0x1290 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807300]  ? ttm_bo_vm_access+0x680/0x680 [ttm]
Feb  5 15:20:59 kaveri kernel: [ 2006.807372]  ? amdgpu_drm_ioctl+0xf7/0x1b0 [amdgpu]
Feb  5 15:20:59 kaveri kernel: [ 2006.807384]  ? mark_held_locks+0x140/0x140
Feb  5 15:20:59 kaveri kernel: [ 2006.807394]  __do_fault+0x80/0x320
Feb  5 15:20:59 kaveri kernel: [ 2006.807402]  __handle_mm_fault+0x2011/0x39f0
Feb  5 15:20:59 kaveri kernel: [ 2006.807409]  ? trace_hardirqs_on_thunk+0x1a/0x1c
Feb  5 15:20:59 kaveri kernel: [ 2006.807415]  ? vmf_insert_mixed_mkwrite+0x10/0x10
Feb  5 15:20:59 kaveri kernel: [ 2006.807422]  ? find_held_lock+0x33/0x1c0
Feb  5 15:20:59 kaveri kernel: [ 2006.807432]  ? mark_held_locks+0xc1/0x140
Feb  5 15:20:59 kaveri kernel: [ 2006.807438]  ? handle_mm_fault+0x4e7/0x750
Feb  5 15:20:59 kaveri kernel: [ 2006.807446]  handle_mm_fault+0x23f/0x750
Feb  5 15:20:59 kaveri kernel: [ 2006.807455]  __do_page_fault+0x402/0x9e0
Feb  5 15:20:59 kaveri kernel: [ 2006.807463]  ? page_fault+0x8/0x30
Feb  5 15:20:59 kaveri 

Re: [PATCH] drm/amdgpu/display: fix compiler errors [-Werror,-Wparentheses-equality]

2019-02-05 Thread Wentland, Harry
On 2019-02-04 3:16 a.m., Vishwakarma, Pratik wrote:
> Remove extraneous parentheses around the comparison
> to silence this warning
> 
> Signed-off-by: Pratik Vishwakarma 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c 
> b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
> index 7d102ac0d61b..1ef0074302c5 100644
> --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
> +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c
> @@ -63,7 +63,7 @@ void scaler_settings_calculation(struct 
> dcn_bw_internal_vars *v)
>   if (v->interlace_output[k] == 1.0) {
>   v->v_ratio[k] = 2.0 * v->v_ratio[k];
>   }
> - if ((v->underscan_output[k] == 1.0)) {
> + if (v->underscan_output[k] == 1.0) {
>   v->h_ratio[k] = v->h_ratio[k] * v->under_scan_factor;
>   v->v_ratio[k] = v->v_ratio[k] * v->under_scan_factor;
>   }
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6

2019-02-05 Thread Christian König

Am 04.02.19 um 19:23 schrieb Yang, Philip:

Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   1 -
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 158 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|  25 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 173 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   3 +-
  9 files changed, 198 insertions(+), 277 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0b31a1859023..0e1711a75b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
  
  	atomic_t invalid;

struct amdkfd_process_info *process_info;
-   struct page **user_pages;
  
  	struct amdgpu_sync sync;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index d7b10d79f1de..ae2d838d31ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
goto out;
}
  
-	/* If no restore worker is running concurrently, user_pages

-* should not be allocated
-*/
-   WARN(mem->user_pages, "Leaking user_pages array");
-
-   mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-  sizeof(struct page *),
-  GFP_KERNEL | __GFP_ZERO);
-   if (!mem->user_pages) {
-   pr_err("%s: Failed to allocate pages array\n", __func__);
-   ret = -ENOMEM;
-   goto unregister_out;
-   }
-
-   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-   goto free_out;
+   goto unregister_out;
}
  
-	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);

-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
amdgpu_bo_unreserve(bo);
  
  release_out:

-   if (ret)
-   release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-   kvfree(mem->user_pages);
-   mem->user_pages = NULL;
+   amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
  unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = >tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(>kfd_bo.tv.head, >list);
  
  	amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]);

@@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = >tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(>kfd_bo.tv.head, >list);
  
  	i = 0;

@@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(_list_entry->head);
mutex_unlock(_info->lock);
  
-	/* Free user pages if necessary */

-   if (mem->user_pages) {
-   pr_debug("%s: Freeing user_pages array\n", __func__);
-   if (mem->user_pages[0])
-   release_pages(mem->user_pages,
-   mem->bo->tbo.ttm->num_pages);
-   kvfree(mem->user_pages);
-   }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, );
if (unlikely(ret))
return ret;
@@ -1855,25 +1824,11 @@ static int 

Re: [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2

2019-02-05 Thread Christian König

Am 04.02.19 um 19:23 schrieb Yang, Philip:

There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)


In general this sounds correct to me, but apart from that I don't know 
the code well enough to fully judge.




Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
Signed-off-by: Philip Yang 


Acked-by: Christian König 


---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++-
  1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..efe0d3c0215b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd_mgr;
  
-	retval = 0;

-
-   dqm_lock(dqm);
-
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were 
already created\n",
dqm->total_queue_count);
retval = -EPERM;
-   goto out_unlock;
+   goto out;
}
  
  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {

retval = allocate_sdma_queue(dqm, >sdma_id);
if (retval)
-   goto out_unlock;
+   goto out;
q->properties.sdma_queue_id =
q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id =
@@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
if (retval)
goto out_deallocate_sdma_queue;
  
+	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:

+* lock(dqm) -> bo::reserve
+*/
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
  
@@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

retval = -ENOMEM;
goto out_deallocate_doorbell;
}
+
/*
 * Eviction state logic: we only mark active queues as evicted
 * to avoid the overhead of restoring inactive queues later
@@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
-
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, >mqd, >mqd_mem_obj,
@@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
if (retval)
goto out_deallocate_doorbell;
  
+	dqm_lock(dqm);

+
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
@@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
  out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
-   dqm_unlock(dqm);
-
+out:
return retval;
  }
  
@@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,

qpd->reset_wavefronts = true;
}
  
-	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

-
/*
 * Unconditionally decrement this counter, regardless of the queue's
 * type
@@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
  
  	dqm_unlock(dqm);
  
+	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */

+   mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
return retval;
  
  failed:

@@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct 
device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
  
-	/* lastly, free mqd resources */

+   dqm_unlock(dqm);
+
+   /* Lastly, free mqd resources.
+* Do uninit_mqd() after dqm_unlock to avoid circular locking.
+*/
list_for_each_entry_safe(q, next, >queues_list, list) {
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1648,6 @@ static int 

Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Christian König

Am 05.02.19 um 01:33 schrieb Kuehling, Felix:

On 2019-02-04 3:17 p.m., Kuehling, Felix wrote:

This may cause regressions in KFD if PT BO allocation can trigger
eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
context where we had temporarily removed the eviction fence. PT BO
allocation in amdgpu_vm_bo_update is not protected like that.

I vaguely remember looking into this last time you were working on our
eviction fence code and thinking that most of the cases where we remove
the eviction fence were no longer needed if fence synchronization
happens through the amdgpu_sync-object API (rather than waiting on a
reservation object directly). I think it's time I go and finally clean
that up.

I'm looking at this now. It's not working as I was hoping.



Do you know whether PT BO allocation would wait on the page-directory
reservation object without the sync-object API anywhere?

It doesn't even matter. Buffer moves (anything calling amdgpu_sync_resv
with AMDGPU_FENCE_OWNER_UNDEFINED) are supposed to trigger eviction
fences. So page table BO allocation triggers eviction fences on the PD
reservation. I don't know how to avoid this other than by removing the
eviction fence while allocating PT BOs. With your changes this will be
potentially every time we map or unmap memory.

Any better ideas?


Let me take a closer look what exactly is happening here.

Regards,
Christian.



Regards,
    Felix



Regards,
     Felix

On 2019-02-04 7:42 a.m., Christian König wrote:

Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: Christian König 
---
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
5 files changed, 38 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..2176c92f9377 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct 
kgd_mem *mem,
vm->process_info->eviction_fence,
NULL, NULL);

-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

-   if (ret) {
-   pr_err("Failed to allocate pts, err=%d\n", ret);
-   goto err_alloc_pts;
-   }
-
ret = vm_validate_pt_pd_bos(vm);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
return -ENOMEM;
}

-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

-   size);
-   if (r) {
-   DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
-   amdgpu_vm_bo_rmv(adev, *bo_va);
-   ttm_eu_backoff_reservation(, );
-   return r;
-   }
-
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 AMDGPU_PTE_EXECUTABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d21dd2f369da..e141e3b13112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

	switch (args->operation) {

case AMDGPU_VA_OP_MAP:
-   r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-   args->map_size);
-   if (r)
-   goto error_backoff;
-
va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 args->offset_in_bo, args->map_size,
@@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
-   r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-   args->map_size);
-   if (r)
-   goto error_backoff;
-
va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = 

Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

2019-02-05 Thread Christian König

This may cause regressions in KFD if PT BO allocation can trigger
eviction fences.

I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables 
from the same process and that seriously doesn't make much sense.



Do you know whether PT BO allocation would wait on the page-directory
reservation object without the sync-object API anywhere?
For inside the kernel I can't think of any relevant use case, except for 
the eviction call path and there it is intentional.


One thing which will always wait for all fences is the display code 
path, but that is not relevant here. (Isn't it?).


What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think 
this is also completely intentional that we wait on everything here.


Regards,
Christian.

Am 04.02.19 um 21:17 schrieb Kuehling, Felix:

This may cause regressions in KFD if PT BO allocation can trigger
eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
context where we had temporarily removed the eviction fence. PT BO
allocation in amdgpu_vm_bo_update is not protected like that.

I vaguely remember looking into this last time you were working on our
eviction fence code and thinking that most of the cases where we remove
the eviction fence were no longer needed if fence synchronization
happens through the amdgpu_sync-object API (rather than waiting on a
reservation object directly). I think it's time I go and finally clean
that up.

Do you know whether PT BO allocation would wait on the page-directory
reservation object without the sync-object API anywhere?

Regards,
    Felix

On 2019-02-04 7:42 a.m., Christian König wrote:

Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: Christian König 
---
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c   |   9 --
   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  10 --
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 136 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 -
   5 files changed, 38 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..2176c92f9377 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct 
kgd_mem *mem,
vm->process_info->eviction_fence,
NULL, NULL);
   
-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

-   if (ret) {
-   pr_err("Failed to allocate pts, err=%d\n", ret);
-   goto err_alloc_pts;
-   }
-
ret = vm_validate_pt_pd_bos(vm);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
return -ENOMEM;
}
   
-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

-   size);
-   if (r) {
-   DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
-   amdgpu_vm_bo_rmv(adev, *bo_va);
-   ttm_eu_backoff_reservation(, );
-   return r;
-   }
-
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 AMDGPU_PTE_EXECUTABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d21dd2f369da..e141e3b13112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
   
   	switch (args->operation) {

case AMDGPU_VA_OP_MAP:
-   r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-   args->map_size);
-   if (r)
-   goto error_backoff;
-
va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 args->offset_in_bo, args->map_size,
@@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
-   r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
- 

Re: [PATCH] drm/amdgu/vce_v3: start vce block before ring test

2019-02-05 Thread Koenig, Christian
Am 05.02.19 um 10:21 schrieb S, Shirish:
> On 2/4/2019 9:00 PM, Liu, Leo wrote:
>> On 2/4/19 7:49 AM, Koenig, Christian wrote:
>>> Am 04.02.19 um 13:44 schrieb S, Shirish:
 vce ring test fails during resume since mmVCE_RB_RPTR*
 is not intitalized/updated.

 Hence start vce block before ring test.
>>> Mhm, I wonder why this ever worked. But yeah, same problem seems to
>>> exits for VCE 2 as well.
>>>
>>> Leo any comment on this?
>> UVD and VCE start function were at hw_init originally from the bring up
>> on all the HW. And later the DPM developer moved them to
>> set_powergating_state() for some reason.
>>
>> @Shirish, are you sure the vce_v3_0_start() is not there?
>>
>> Just simply adding it back to hw_init, might break the DPM logic, so
>> please make sure.
> Sure Leo, i will check and get back.

I've just double checked the code and at least of hand this patch looks 
incorrect to me.

Submitting commands to the ring should automatically calls 
amdgpu_vce_ring_begin_use() and so ungate the power and clocks and start 
the engine.

This is actually vital for the ring test, so this patch is a clear NAK.

Christian.

>
> Regards,
>
> Shirish S
>
>> Thanks,
>>
>> Leo
>>
>>
>>> Thanks,
>>> Christian.
>>>
 Signed-off-by: Shirish S 
 ---
 * vce_v4_0.c's hw_init sequence already has this change.

  drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 
  1 file changed, 4 insertions(+)

 diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
 b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
 index 6ec65cf1..d809c10 100644
 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
 +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
 @@ -469,6 +469,10 @@ static int vce_v3_0_hw_init(void *handle)
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
 +  r = vce_v3_0_start(adev);
 +  if (r)
 +  return r;
 +
vce_v3_0_override_vce_clock_gating(adev, true);
  
amdgpu_asic_set_vce_clocks(adev, 1, 1);

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()

2019-02-05 Thread Noralf Trønnes


Den 05.02.2019 10.11, skrev Daniel Vetter:
> On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote:
>>
>>
>> Den 04.02.2019 16.41, skrev Daniel Vetter:
>>> On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote:
 The only thing now that makes drm_dev_unplug() special is that it sets
 drm_device->unplugged. Move this code to drm_dev_unregister() so that we
 can remove drm_dev_unplug().

 Signed-off-by: Noralf Trønnes 
 ---
>>
>> [...]
>>
  drivers/gpu/drm/drm_drv.c | 27 +++
  include/drm/drm_drv.h | 10 --
  2 files changed, 19 insertions(+), 18 deletions(-)

 diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
 index 05bbc2b622fc..e0941200edc6 100644
 --- a/drivers/gpu/drm/drm_drv.c
 +++ b/drivers/gpu/drm/drm_drv.c
 @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit);
   */
  void drm_dev_unplug(struct drm_device *dev)
  {
 -  /*
 -   * After synchronizing any critical read section is guaranteed to see
 -   * the new value of ->unplugged, and any critical section which might
 -   * still have seen the old value of ->unplugged is guaranteed to have
 -   * finished.
 -   */
 -  dev->unplugged = true;
 -  synchronize_srcu(_unplug_srcu);
 -
drm_dev_unregister(dev);
drm_dev_put(dev);
  }
 @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register);
   * drm_dev_register() but does not deallocate the device. The caller must 
 call
   * drm_dev_put() to drop their final reference.
   *
 - * A special form of unregistering for hotpluggable devices is 
 drm_dev_unplug(),
 - * which can be called while there are still open users of @dev.
 + * This function can be called while there are still open users of @dev 
 as long
 + * as the driver protects its device resources using drm_dev_enter() and
 + * drm_dev_exit().
   *
   * This should be called first in the device teardown code to make sure
 - * userspace can't access the device instance any more.
 + * userspace can't access the device instance any more. Drivers that 
 support
 + * device unplug will probably want to call drm_atomic_helper_shutdown() 
 first
>>>
>>> Read once more with a bit more coffee, spotted this:
>>>
>>> s/first/afterwards/ - shutting down the hw before we've taken it away from
>>> userspace is kinda the wrong way round. It should be the inverse of driver
>>> load, which is 1) allocate structures 2) prep hw 3) register driver with
>>> the world (simplified ofc).
>>>
>>
>> The problem is that drm_dev_unregister() sets the device as unplugged
>> and if drm_atomic_helper_shutdown() is called afterwards it's not
>> allowed to touch hardware.
>>
>> I know it's the wrong order, but the only way to do it in the right
>> order is to have a separate function that sets unplugged:
>>
>>  drm_dev_unregister();
>>  drm_atomic_helper_shutdown();
>>  drm_dev_set_unplugged();
> 
> Annoying ... but yeah calling _shutdown() before we stopped userspace is
> also not going to work. Because userspace could quickly re-enable
> something, and then the refcounts would be all wrong again and leaking
> objects.
> 

What happens with a USB device that is unplugged with open userspace,
will that leak objects?

> I get a bit the feeling we're over-optimizing here with trying to devm-ize
> drm_dev_register. Just getting drm_device correctly devm-ized is a big
> step forward already, and will open up a lot of TODO items across a lot of
> drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_*
> structs, which gets released together with drm_device. I think that's a
> much clearer path forward, I think we all agree that getting the kfree out
> of the driver codes is a good thing, and it would allow us to do this
> correctly.
> 
> Then once we have that and rolled out to a few drivers we can reconsider
> the entire unregister/shutdown gordian knot here. Atm I just have no idea
> how to do this properly :-/
> 
> Thoughts, other ideas?
> 

Yeah, I've come to the conclusion that devm_drm_dev_register() doesn't
make much sense if we need a driver remove callback anyways.

I think devm_drm_dev_init() makes sense because it yields a cleaner
probe() function. An additional benefit is that it requires a
drm_driver->release function which is a step in the right direction to
get the drm_device lifetime right.

Do we agree that a drm_dev_set_unplugged() function is necessary to get
the remove/disconnect order right?

What about drm_dev_unplug() maybe I should just leave it be?

- amd uses drm_driver->unload, so that one takes some work to get right
  to support unplug. It doesn't check the unplugged state, so really
  doesn't need drm_dev_unplug() I guess. Do they have cards that can be
  hotplugged?
- udl uses drm_driver->unload, doesn't use drm_atomic_helper_shutdown().
  It has only one 

Re: [PATCH] drm/amdgu/vce_v3: start vce block before ring test

2019-02-05 Thread S, Shirish

On 2/4/2019 9:00 PM, Liu, Leo wrote:
> On 2/4/19 7:49 AM, Koenig, Christian wrote:
>> Am 04.02.19 um 13:44 schrieb S, Shirish:
>>> vce ring test fails during resume since mmVCE_RB_RPTR*
>>> is not intitalized/updated.
>>>
>>> Hence start vce block before ring test.
>> Mhm, I wonder why this ever worked. But yeah, same problem seems to
>> exits for VCE 2 as well.
>>
>> Leo any comment on this?
> UVD and VCE start function were at hw_init originally from the bring up
> on all the HW. And later the DPM developer moved them to
> set_powergating_state() for some reason.
>
> @Shirish, are you sure the vce_v3_0_start() is not there?
>
> Just simply adding it back to hw_init, might break the DPM logic, so
> please make sure.

Sure Leo, i will check and get back.

Regards,

Shirish S

>
> Thanks,
>
> Leo
>
>
>> Thanks,
>> Christian.
>>
>>> Signed-off-by: Shirish S 
>>> ---
>>> * vce_v4_0.c's hw_init sequence already has this change.
>>>
>>> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 
>>> 1 file changed, 4 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> index 6ec65cf1..d809c10 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> @@ -469,6 +469,10 @@ static int vce_v3_0_hw_init(void *handle)
>>> int r, i;
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> 
>>> +   r = vce_v3_0_start(adev);
>>> +   if (r)
>>> +   return r;
>>> +
>>> vce_v3_0_override_vce_clock_gating(adev, true);
>>> 
>>> amdgpu_asic_set_vce_clocks(adev, 1, 1);

-- 
Regards,
Shirish S

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 2/6] drm/drv: Prepare to remove drm_dev_unplug()

2019-02-05 Thread Daniel Vetter
On Mon, Feb 04, 2019 at 06:35:28PM +0100, Noralf Trønnes wrote:
> 
> 
> Den 04.02.2019 16.41, skrev Daniel Vetter:
> > On Sun, Feb 03, 2019 at 04:41:56PM +0100, Noralf Trønnes wrote:
> >> The only thing now that makes drm_dev_unplug() special is that it sets
> >> drm_device->unplugged. Move this code to drm_dev_unregister() so that we
> >> can remove drm_dev_unplug().
> >>
> >> Signed-off-by: Noralf Trønnes 
> >> ---
> 
> [...]
> 
> >>  drivers/gpu/drm/drm_drv.c | 27 +++
> >>  include/drm/drm_drv.h | 10 --
> >>  2 files changed, 19 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> >> index 05bbc2b622fc..e0941200edc6 100644
> >> --- a/drivers/gpu/drm/drm_drv.c
> >> +++ b/drivers/gpu/drm/drm_drv.c
> >> @@ -366,15 +366,6 @@ EXPORT_SYMBOL(drm_dev_exit);
> >>   */
> >>  void drm_dev_unplug(struct drm_device *dev)
> >>  {
> >> -  /*
> >> -   * After synchronizing any critical read section is guaranteed to see
> >> -   * the new value of ->unplugged, and any critical section which might
> >> -   * still have seen the old value of ->unplugged is guaranteed to have
> >> -   * finished.
> >> -   */
> >> -  dev->unplugged = true;
> >> -  synchronize_srcu(_unplug_srcu);
> >> -
> >>drm_dev_unregister(dev);
> >>drm_dev_put(dev);
> >>  }
> >> @@ -832,11 +823,14 @@ EXPORT_SYMBOL(drm_dev_register);
> >>   * drm_dev_register() but does not deallocate the device. The caller must 
> >> call
> >>   * drm_dev_put() to drop their final reference.
> >>   *
> >> - * A special form of unregistering for hotpluggable devices is 
> >> drm_dev_unplug(),
> >> - * which can be called while there are still open users of @dev.
> >> + * This function can be called while there are still open users of @dev 
> >> as long
> >> + * as the driver protects its device resources using drm_dev_enter() and
> >> + * drm_dev_exit().
> >>   *
> >>   * This should be called first in the device teardown code to make sure
> >> - * userspace can't access the device instance any more.
> >> + * userspace can't access the device instance any more. Drivers that 
> >> support
> >> + * device unplug will probably want to call drm_atomic_helper_shutdown() 
> >> first
> > 
> > Read once more with a bit more coffee, spotted this:
> > 
> > s/first/afterwards/ - shutting down the hw before we've taken it away from
> > userspace is kinda the wrong way round. It should be the inverse of driver
> > load, which is 1) allocate structures 2) prep hw 3) register driver with
> > the world (simplified ofc).
> > 
> 
> The problem is that drm_dev_unregister() sets the device as unplugged
> and if drm_atomic_helper_shutdown() is called afterwards it's not
> allowed to touch hardware.
> 
> I know it's the wrong order, but the only way to do it in the right
> order is to have a separate function that sets unplugged:
> 
>   drm_dev_unregister();
>   drm_atomic_helper_shutdown();
>   drm_dev_set_unplugged();

Annoying ... but yeah calling _shutdown() before we stopped userspace is
also not going to work. Because userspace could quickly re-enable
something, and then the refcounts would be all wrong again and leaking
objects.

I get a bit the feeling we're over-optimizing here with trying to devm-ize
drm_dev_register. Just getting drm_device correctly devm-ized is a big
step forward already, and will open up a lot of TODO items across a lot of
drivers. E.g. we could add a drm_dev_kzalloc, for allocating all the drm_*
structs, which gets released together with drm_device. I think that's a
much clearer path forward, I think we all agree that getting the kfree out
of the driver codes is a good thing, and it would allow us to do this
correctly.

Then once we have that and rolled out to a few drivers we can reconsider
the entire unregister/shutdown gordian knot here. Atm I just have no idea
how to do this properly :-/

Thoughts, other ideas?

Cheers, Daniel

> Noralf.
> 
> >> + * in order to disable the hardware on regular driver module unload.
> >>   */
> >>  void drm_dev_unregister(struct drm_device *dev)
> >>  {
> >> @@ -845,6 +839,15 @@ void drm_dev_unregister(struct drm_device *dev)
> >>if (drm_core_check_feature(dev, DRIVER_LEGACY))
> >>drm_lastclose(dev);
> >>  
> >> +  /*
> >> +   * After synchronizing any critical read section is guaranteed to see
> >> +   * the new value of ->unplugged, and any critical section which might
> >> +   * still have seen the old value of ->unplugged is guaranteed to have
> >> +   * finished.
> >> +   */
> >> +  dev->unplugged = true;
> >> +  synchronize_srcu(_unplug_srcu);
> >> +
> >>dev->registered = false;
> >>  
> >>drm_client_dev_unregister(dev);
> >> diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
> >> index ca46a45a9cce..c50696c82a42 100644
> >> --- a/include/drm/drm_drv.h
> >> +++ b/include/drm/drm_drv.h
> >> @@ -736,13 +736,11 @@ void drm_dev_unplug(struct drm_device *dev);
> >>   * 

[bug report] drm/amd/display: Call into DC once per multiplane flip

2019-02-05 Thread Dan Carpenter
Hello David Francis,

This is a semi-automatic email about new static checker warnings.

The patch 8a48b44cd00f: "drm/amd/display: Call into DC once per 
multiplane flip" from Dec 11, 2018, leads to the following Smatch 
complaint:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4763 
amdgpu_dm_commit_planes()
warn: variable dereferenced before check 'acrtc_state->stream' (see line 
4748)

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c
  4747  
  4748  stream_status = 
dc_stream_get_status(acrtc_state->stream);
 
^^^
New unchecked dereference inside function.

  4749  if (!stream_status) {
  4750  DRM_ERROR("No stream status for CRTC: 
id=%d\n",
  4751  acrtc_attach->crtc_id);
  4752  continue;
  4753  }
  4754  
  4755  surface = stream_status->plane_states[0];
  4756  flip->surface_updates[flip_count].surface = 
surface;
  4757  if (!flip->surface_updates[flip_count].surface) 
{
  4758  DRM_ERROR("No surface for CRTC: 
id=%d\n",
  4759  acrtc_attach->crtc_id);
  4760  continue;
  4761  }
  4762  
  4763  if (acrtc_state->stream)
^^^
New check is too late.

  4764  update_freesync_state_on_stream(
  4765  dm,

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx