Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads

2018-05-11 Thread Oded Gabbay
Hi Felix,
The patch looks fine to me and I can ACK it, but I would prefer that
Alex or Christian will review it as well before pushing it.
Thanks,
Oded

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  wrote:
> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
> thread rather than regular process context. This will be used when
> KFD userptr BOs are restored after an MMU-notifier eviction.
>
> v2: Manage task reference with get_task_struct/put_task_struct
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 
> +
>  1 file changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c2fae04..25490fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
> struct ttm_dma_tt   ttm;
> u64 offset;
> uint64_tuserptr;
> -   struct mm_struct*usermm;
> +   struct task_struct  *usertask;
> uint32_tuserflags;
> spinlock_t  guptasklock;
> struct list_headguptasks;
> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>  {
> struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +   struct mm_struct *mm = gtt->usertask->mm;
> unsigned int flags = 0;
> unsigned pinned = 0;
> int r;
>
> +   if (!mm) /* Happens during process shutdown */
> +   return -ESRCH;
> +
> if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> flags |= FOLL_WRITE;
>
> -   down_read(>mm->mmap_sem);
> +   down_read(>mmap_sem);
>
> if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
> /* check that we only use anonymous memory
> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
> unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> struct vm_area_struct *vma;
>
> -   vma = find_vma(gtt->usermm, gtt->userptr);
> +   vma = find_vma(mm, gtt->userptr);
> if (!vma || vma->vm_file || vma->vm_end < end) {
> -   up_read(>mm->mmap_sem);
> +   up_read(>mmap_sem);
> return -EPERM;
> }
> }
> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
> list_add(, >guptasks);
> spin_unlock(>guptasklock);
>
> -   r = get_user_pages(userptr, num_pages, flags, p, NULL);
> +   if (mm == current->mm)
> +   r = get_user_pages(userptr, num_pages, flags, p, 
> NULL);
> +   else
> +   r = get_user_pages_remote(gtt->usertask,
> +   mm, userptr, num_pages,
> +   flags, p, NULL, NULL);
>
> spin_lock(>guptasklock);
> list_del();
> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
>
> } while (pinned < ttm->num_pages);
>
> -   up_read(>mm->mmap_sem);
> +   up_read(>mmap_sem);
> return 0;
>
>  release_pages:
> release_pages(pages, pinned);
> -   up_read(>mm->mmap_sem);
> +   up_read(>mmap_sem);
> return r;
>  }
>
> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
>  {
> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>
> +   if (gtt->usertask)
> +   put_task_struct(gtt->usertask);
> +
> ttm_dma_tt_fini(>ttm);
> kfree(gtt);
>  }
> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, 
> uint64_t addr,
> return -EINVAL;
>
> gtt->userptr = addr;
> -   gtt->usermm = current->mm;
> gtt->userflags = flags;
> +
> +   if (gtt->usertask)
> +   put_task_struct(gtt->usertask);
> +   gtt->usertask = current->group_leader;
> +   get_task_struct(gtt->usertask);
> +
> spin_lock_init(>guptasklock);
> INIT_LIST_HEAD(>guptasks);
> atomic_set(>mmu_invalidations, 0);
> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct 
> ttm_tt *ttm)
> if (gtt == NULL)
> return NULL;
>
> -   return gtt->usermm;
> +   if (gtt->usertask == NULL)
> +   return NULL;
> +
> +   return gtt->usertask->mm;
>  }
>
>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> --
> 2.7.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH 2/2] drm/amdgpu: set ttm bo priority before initialization

2018-05-11 Thread Christian König
Looks like a good idea to me as well. Reviewed-by: Christian König 
 for the series.


Regards,
Christian.

Am 11.05.2018 um 07:27 schrieb Zhou, David(ChunMing):

The series  is OK to me, Reviewed-by: Chunming  Zhou 
It is better to wait Christian to have a look  before pushing patch.

Regards,
David Zhou
-Original Message-
From: Junwei Zhang [mailto:jerry.zh...@amd.com]
Sent: Friday, May 11, 2018 12:58 PM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Koenig, Christian ; Zhou, David(ChunMing) 
; Zhang, Jerry 
Subject: [PATCH 2/2] drm/amdgpu: set ttm bo priority before initialization

Signed-off-by: Junwei Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e62153a..6a9e46a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -419,6 +419,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
  
  	bo->tbo.bdev = >mman.bdev;

amdgpu_ttm_placement_from_domain(bo, bp->domain);
+   if (bp->type == ttm_bo_type_kernel)
+   bo->tbo.priority = 1;
  
  	r = ttm_bo_init_reserved(>mman.bdev, >tbo, size, bp->type,

 >placement, page_align, , acc_size, @@ 
-434,9 +436,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
  
-	if (bp->type == ttm_bo_type_kernel)

-   bo->tbo.priority = 1;
-
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
--
1.9.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier

2018-05-11 Thread Oded Gabbay
Hi Felix,
Same as patch 2. ACK but I want an extra review from amdgpu people.
Thanks,
Oded

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  wrote:
> When an MMU notifier runs in memory reclaim context, it can deadlock
> trying to take locks that are already held in the thread causing the
> memory reclaim. The solution is to avoid memory reclaim while holding
> locks that are taken in MMU notifiers.
>
> This commit fixes kmalloc while holding rmn->lock by moving the call
> outside the lock. The GFX MMU notifier also locks reservation objects.
> I have no good solution for avoiding reclaim while holding reservation
> objects. The HSA MMU notifier will not lock any reservation objects.
>
> v2: Moved allocation outside lock instead of using GFP_NOIO
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index f2ed18e..83e344f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -380,7 +380,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned 
> long addr)
> enum amdgpu_mn_type type =
> bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
> struct amdgpu_mn *rmn;
> -   struct amdgpu_mn_node *node = NULL;
> +   struct amdgpu_mn_node *node = NULL, *new_node;
> struct list_head bos;
> struct interval_tree_node *it;
>
> @@ -388,6 +388,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned 
> long addr)
> if (IS_ERR(rmn))
> return PTR_ERR(rmn);
>
> +   new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
> +   if (!new_node)
> +   return -ENOMEM;
> +
> INIT_LIST_HEAD();
>
> down_write(>lock);
> @@ -401,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned 
> long addr)
> list_splice(>bos, );
> }
>
> -   if (!node) {
> -   node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
> -   if (!node) {
> -   up_write(>lock);
> -   return -ENOMEM;
> -   }
> -   }
> +   if (!node)
> +   node = new_node;
> +   else
> +   kfree(new_node);
>
> bo->mn = rmn;
>
> --
> 2.7.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/6] drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier

2018-05-11 Thread Oded Gabbay
On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  wrote:
> When an MMU notifier runs in memory reclaim context, it can deadlock
> trying to take locks that are already held in the thread causing the
> memory reclaim. The solution is to avoid memory reclaim while holding
> locks that are taken in MMU notifiers by using GFP_NOIO.

Which locks are problematic ?

The kernel recommendation is to use "memalloc_noio_{save,restore} to
mark the whole scope which cannot perform any IO with a short
explanation why"

By using the scope functions, you protect against future allocation
code that will be written in the critical path, without worrying about
the developer using the correct GFP_NOIO flag.

Oded

>
> This commit fixes memory allocations done while holding the dqm->lock
> which is needed in the MMU notifier (dqm->ops.evict_process_queues).
>
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 334669996..0434f65 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -652,7 +652,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int 
> size,
> if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
> return -ENOMEM;
>
> -   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
> +   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
> if ((*mem_obj) == NULL)
> return -ENOMEM;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> index c00c325..2bc49c6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> @@ -412,7 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum 
> KFD_MQD_TYPE type,
> if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
> return NULL;
>
> -   mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> +   mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
> if (!mqd)
> return NULL;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index 89e4242..481307b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE 
> type,
> if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
> return NULL;
>
> -   mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> +   mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
> if (!mqd)
> return NULL;
>
> --
> 2.7.4
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 0/6] Userptr memory mapping support for KFD

2018-05-11 Thread Oded Gabbay
On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  wrote:
> Update of remaining patches from the GPUVM patch series. This should apply
> on top of the fixes I just sent out.
>
> Felix Kuehling (6):
>   drm/amdgpu: Add MMU notifier type for KFD userptr
>   drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
>   drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
>   drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
>   drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls
>   drm/amdgpu: Add userptr support for KFD
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   |  12 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 572 
> ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c   | 111 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h   |  11 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  38 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c  |  40 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c  |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   4 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c |  10 +-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |   6 +
>  13 files changed, 746 insertions(+), 66 deletions(-)
>
> --
> 2.7.4
>

Hi Felix,

Patch 1, 5 & 6 are reviewed by me.
Patches 2 & 3 are fine but I would like approval from Alex/Christian
Patch 4 - I think we can do better. See my comments there.

Thanks,
Oded
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads

2018-05-11 Thread Oded Gabbay
Adding Alex, Christian

On Fri, May 11, 2018 at 10:36 AM, Oded Gabbay  wrote:
> Hi Felix,
> The patch looks fine to me and I can ACK it, but I would prefer that
> Alex or Christian will review it as well before pushing it.
> Thanks,
> Oded
>
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  
> wrote:
>> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
>> thread rather than regular process context. This will be used when
>> KFD userptr BOs are restored after an MMU-notifier eviction.
>>
>> v2: Manage task reference with get_task_struct/put_task_struct
>>
>> Signed-off-by: Felix Kuehling 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 
>> +
>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c2fae04..25490fe 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
>> struct ttm_dma_tt   ttm;
>> u64 offset;
>> uint64_tuserptr;
>> -   struct mm_struct*usermm;
>> +   struct task_struct  *usertask;
>> uint32_tuserflags;
>> spinlock_t  guptasklock;
>> struct list_headguptasks;
>> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>  {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +   struct mm_struct *mm = gtt->usertask->mm;
>> unsigned int flags = 0;
>> unsigned pinned = 0;
>> int r;
>>
>> +   if (!mm) /* Happens during process shutdown */
>> +   return -ESRCH;
>> +
>> if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> flags |= FOLL_WRITE;
>>
>> -   down_read(>mm->mmap_sem);
>> +   down_read(>mmap_sem);
>>
>> if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>> /* check that we only use anonymous memory
>> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>> unsigned long end = gtt->userptr + ttm->num_pages * 
>> PAGE_SIZE;
>> struct vm_area_struct *vma;
>>
>> -   vma = find_vma(gtt->usermm, gtt->userptr);
>> +   vma = find_vma(mm, gtt->userptr);
>> if (!vma || vma->vm_file || vma->vm_end < end) {
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return -EPERM;
>> }
>> }
>> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>> list_add(, >guptasks);
>> spin_unlock(>guptasklock);
>>
>> -   r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +   if (mm == current->mm)
>> +   r = get_user_pages(userptr, num_pages, flags, p, 
>> NULL);
>> +   else
>> +   r = get_user_pages_remote(gtt->usertask,
>> +   mm, userptr, num_pages,
>> +   flags, p, NULL, NULL);
>>
>> spin_lock(>guptasklock);
>> list_del();
>> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>>
>> } while (pinned < ttm->num_pages);
>>
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return 0;
>>
>>  release_pages:
>> release_pages(pages, pinned);
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return r;
>>  }
>>
>> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt 
>> *ttm)
>>  {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>
>> +   if (gtt->usertask)
>> +   put_task_struct(gtt->usertask);
>> +
>> ttm_dma_tt_fini(>ttm);
>> kfree(gtt);
>>  }
>> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, 
>> uint64_t addr,
>> return -EINVAL;
>>
>> gtt->userptr = addr;
>> -   gtt->usermm = current->mm;
>> gtt->userflags = flags;
>> +
>> +   if (gtt->usertask)
>> +   put_task_struct(gtt->usertask);
>> +   gtt->usertask = current->group_leader;
>> +   get_task_struct(gtt->usertask);
>> +
>> spin_lock_init(>guptasklock);
>> INIT_LIST_HEAD(>guptasks);
>> atomic_set(>mmu_invalidations, 0);
>> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct 
>> ttm_tt *ttm)
>> if (gtt == NULL)
>> return NULL;
>>
>> -   return gtt->usermm;
>> +   if (gtt->usertask == NULL)
>> +   return NULL;
>> +

Re: [PATCH 11/21] drm/amdkfd: Add GFXv9 MQD manager

2018-05-11 Thread Oded Gabbay
On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling  wrote:
> Signed-off-by: John Bridgman 
> Signed-off-by: Jay Cornwall 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/Makefile |   1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c|   3 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 443 
> 
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   3 +
>  5 files changed, 451 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
> b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 52b3c1b..094b591 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -30,6 +30,7 @@ amdkfd-y  := kfd_module.o kfd_device.o kfd_chardev.o 
> kfd_topology.o \
> kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
> kfd_process.o kfd_queue.o kfd_mqd_manager.o \
> kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
> +   kfd_mqd_manager_v9.o \
> kfd_kernel_queue.o kfd_kernel_queue_cik.o \
> kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
> kfd_packet_manager.o kfd_process_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index f563acb..c368ce3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -700,7 +700,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int 
> size,
> if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
> return -ENOMEM;
>
> -   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
> +   *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
This assumes the patch in the userptr patch-set is applied. I changed
it to GFP_KERNEL for now.

> if ((*mem_obj) == NULL)
> return -ENOMEM;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index ee7061e..4b8eb50 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
> case CHIP_POLARIS10:
> case CHIP_POLARIS11:
> return mqd_manager_init_vi_tonga(type, dev);
> +   case CHIP_VEGA10:
> +   case CHIP_RAVEN:
> +   return mqd_manager_init_v9(type, dev);
> default:
> WARN(1, "Unexpected ASIC family %u",
>  dev->device_info->asic_family);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> new file mode 100644
> index 000..684054f
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -0,0 +1,443 @@
> +/*
> + * Copyright 2016-2018 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include "kfd_priv.h"
> +#include "kfd_mqd_manager.h"
> +#include "v9_structs.h"
> +#include "gc/gc_9_0_offset.h"
> +#include "gc/gc_9_0_sh_mask.h"
> +#include "sdma0/sdma0_4_0_sh_mask.h"
> +
> +static inline struct v9_mqd *get_mqd(void *mqd)
> +{
> +   return (struct v9_mqd *)mqd;
> +}
> +
> +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
> +{
> +   return (struct v9_sdma_mqd *)mqd;
> +}
> +
> +static int init_mqd(struct mqd_manager *mm, void **mqd,
> +   struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
> +   struct queue_properties *q)
> +{
> +   int 

[PATCH] drm/amdgpu: fix null pointer for bo unmap trace function

2018-05-11 Thread Junwei Zhang
Signed-off-by: Junwei Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263a..e96e26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -275,7 +275,7 @@
 ),
 
TP_fast_assign(
-  __entry->bo = bo_va->base.bo;
+  __entry->bo = bo_va ? bo_va->base.bo : NULL;
   __entry->start = mapping->start;
   __entry->last = mapping->last;
   __entry->offset = mapping->offset;
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 07/21] drm/amdkfd: Clean up KFD_MMAP_ offset handling

2018-05-11 Thread Oded Gabbay
On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling  wrote:
> From: Harish Kasiviswanathan 
>
> Use bit-rotate for better clarity and remove _MASK from the #defines as
> these represent mmap types.
>
> Centralize all the parsing of the mmap offset in kfd_mmap and add device
> parameter to doorbell and reserved_mem map functions.
>
> Encode gpu_id into upper bits of vm_pgoff. This frees up the lower bits
> for encoding the the doorbell ID on Vega10.
>
> Signed-off-by: Harish Kasiviswanathan 
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 35 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c |  9 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 38 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  8 +++
>  5 files changed, 59 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b5e5f0e..f6b35f4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -292,7 +292,8 @@ static int kfd_ioctl_create_queue(struct file *filep, 
> struct kfd_process *p,
>
>
> /* Return gpu_id as doorbell offset for mmap usage */
> -   args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
> +   args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
> +   args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> args->doorbell_offset <<= PAGE_SHIFT;
>
> mutex_unlock(>mutex);
> @@ -1645,23 +1646,33 @@ static long kfd_ioctl(struct file *filep, unsigned 
> int cmd, unsigned long arg)
>  static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
>  {
> struct kfd_process *process;
> +   struct kfd_dev *dev = NULL;
> +   unsigned long vm_pgoff;
> +   unsigned int gpu_id;
>
> process = kfd_get_process(current);
> if (IS_ERR(process))
> return PTR_ERR(process);
>
> -   if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
> -   KFD_MMAP_DOORBELL_MASK) {
> -   vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
> -   return kfd_doorbell_mmap(process, vma);
> -   } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
> -   KFD_MMAP_EVENTS_MASK) {
> -   vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
> +   vm_pgoff = vma->vm_pgoff;
> +   vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> +   gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> +   if (gpu_id)
> +   dev = kfd_device_by_id(gpu_id);
> +
> +   switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> +   case KFD_MMAP_TYPE_DOORBELL:
> +   if (!dev)
> +   return -ENODEV;
> +   return kfd_doorbell_mmap(dev, process, vma);
> +
> +   case KFD_MMAP_TYPE_EVENTS:
> return kfd_event_mmap(process, vma);
> -   } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
> -   KFD_MMAP_RESERVED_MEM_MASK) {
> -   vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
> -   return kfd_reserved_mem_mmap(process, vma);
> +
> +   case KFD_MMAP_TYPE_RESERVED_MEM:
> +   if (!dev)
> +   return -ENODEV;
> +   return kfd_reserved_mem_mmap(dev, process, vma);
> }
>
> return -EFAULT;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 4840314..efc59de 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -126,15 +126,10 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
> iounmap(kfd->doorbell_kernel_ptr);
>  }
>
> -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct 
> *vma)
> +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> + struct vm_area_struct *vma)
>  {
> phys_addr_t address;
> -   struct kfd_dev *dev;
> -
> -   /* Find kfd device according to gpu id */
> -   dev = kfd_device_by_id(vma->vm_pgoff);
> -   if (!dev)
> -   return -EINVAL;
>
> /*
>  * For simplicitly we only allow mapping of the entire doorbell
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 4890a90..bccf2f7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct 
> kfd_process *p,
> case KFD_EVENT_TYPE_DEBUG:
> ret = create_signal_event(devkfd, p, ev);
> if (!ret) {
> -   

Re: GPU hang trying to run OpenCL kernels on x86_64

2018-05-11 Thread Luís Mendes
Hi Michel,

Just made a fresh Ubuntu 18.04 LTS install for armhf with ubuntu
default mesa, libdrm and libclc, with the same 4.16.7 kernel and it
runs fine with the OpenCL applications that cause the hang on x86_64.
So it is surely a kernel or mesa issue exclusive to x86_64, libraries
versions are the same on both systems now, as well as the kernel.
I have even tried amdgpu-pro-18.20 with ubuntu 18.04 on x86_64 and it
hangs just the same.

Regards,
Luís

On Mon, May 7, 2018 at 1:31 PM, Luís Mendes  wrote:
> Hi Michel,
>
> No, I haven't tried the exact same versions of Mesa, nor libclc, nor
> LLVM on x86, however on armhf I have been able to use several
> combinations of Linux kernel versions and mesa versions which are able
> to run the OpenCL kernels in question. I will try to see if the same
> happens with the exact same Ubuntu 18.04 LTS base installation on
> armhf, but won't be able to test it today.
>
> Luís
>
> On Mon, May 7, 2018 at 10:38 AM, Michel Dänzer  wrote:
>> On 2018-05-05 01:15 AM, Luís Mendes wrote:
>>>
>>> - On another system with armhf 32 bits, 1GB ram, 512GB SSD, AMD RX 480
>>> or AMD RX 550
>>>   with Ubuntu 17.10, vanilla kernel 4.16.7, mesa-18.0.2,
>>> libdrm-2.4.92-git, libclc-git at commit
>>> 3d994f2ff2cbb4531223fe2657144cb19f0c5328 (15/Nov/2017)
>>>
>>>   The kernels work properly on the same AMD cards.
>>
>> Have you tried those exact same versions of the kernel, Mesa, libclc and
>> LLVM on x86?
>>
>>
>> --
>> Earthling Michel Dänzer   |   http://www.amd.com
>> Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: fix null pointer for bo unmap trace function

2018-05-11 Thread Christian König

Am 11.05.2018 um 11:30 schrieb Junwei Zhang:

Signed-off-by: Junwei Zhang 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263a..e96e26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -275,7 +275,7 @@
 ),
  
  	TP_fast_assign(

-  __entry->bo = bo_va->base.bo;
+  __entry->bo = bo_va ? bo_va->base.bo : NULL;
   __entry->start = mapping->start;
   __entry->last = mapping->last;
   __entry->offset = mapping->offset;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier

2018-05-11 Thread Christian König

Hi Oded,

it looks sane to me and is Reviewed-by: Christian König 
.


Christian.

Am 11.05.2018 um 09:38 schrieb Oded Gabbay:

Hi Felix,
Same as patch 2. ACK but I want an extra review from amdgpu people.
Thanks,
Oded

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  wrote:

When an MMU notifier runs in memory reclaim context, it can deadlock
trying to take locks that are already held in the thread causing the
memory reclaim. The solution is to avoid memory reclaim while holding
locks that are taken in MMU notifiers.

This commit fixes kmalloc while holding rmn->lock by moving the call
outside the lock. The GFX MMU notifier also locks reservation objects.
I have no good solution for avoiding reclaim while holding reservation
objects. The HSA MMU notifier will not lock any reservation objects.

v2: Moved allocation outside lock instead of using GFP_NOIO

Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 17 +
  1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index f2ed18e..83e344f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -380,7 +380,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long 
addr)
 enum amdgpu_mn_type type =
 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 struct amdgpu_mn *rmn;
-   struct amdgpu_mn_node *node = NULL;
+   struct amdgpu_mn_node *node = NULL, *new_node;
 struct list_head bos;
 struct interval_tree_node *it;

@@ -388,6 +388,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long 
addr)
 if (IS_ERR(rmn))
 return PTR_ERR(rmn);

+   new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+   if (!new_node)
+   return -ENOMEM;
+
 INIT_LIST_HEAD();

 down_write(>lock);
@@ -401,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned 
long addr)
 list_splice(>bos, );
 }

-   if (!node) {
-   node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
-   if (!node) {
-   up_write(>lock);
-   return -ENOMEM;
-   }
-   }
+   if (!node)
+   node = new_node;
+   else
+   kfree(new_node);

 bo->mn = rmn;

--
2.7.4



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: drop printing the BO offset in the gem debugfs

2018-05-11 Thread Christian König
It is meaningless anyway.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7d3dc229fa47..c3f8c5573a97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -803,10 +803,6 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, 
void *data)
seq_printf(m, "\t0x%08x: %12ld byte %s",
   id, amdgpu_bo_size(bo), placement);
 
-   offset = READ_ONCE(bo->tbo.mem.start);
-   if (offset != AMDGPU_BO_INVALID_OFFSET)
-   seq_printf(m, " @ 0x%010Lx", offset);
-
pin_count = READ_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: print the BO flags in the gem debugfs entry

2018-05-11 Thread Christian König
Quite usfull to know.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index c3f8c5573a97..b09030167c44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -774,6 +774,12 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 }
 
 #if defined(CONFIG_DEBUG_FS)
+
+#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)  \
+   if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
+   seq_printf((m), " " #flag); \
+   }
+
 static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 {
struct drm_gem_object *gobj = ptr;
@@ -815,6 +821,15 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, 
void *data)
else if (dma_buf)
seq_printf(m, " exported as %p", dma_buf);
 
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+   amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
seq_printf(m, "\n");
 
return 0;
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: print the BO flags in the gem debugfs entry

2018-05-11 Thread Alex Deucher
On Fri, May 11, 2018 at 11:40 AM, Christian König
 wrote:
> Quite usfull to know.

"useful"

With that fixed:
Reviewed-by: Alex Deucher 

>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index c3f8c5573a97..b09030167c44 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -774,6 +774,12 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>  }
>
>  #if defined(CONFIG_DEBUG_FS)
> +
> +#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)  \
> +   if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
> +   seq_printf((m), " " #flag); \
> +   }
> +
>  static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
>  {
> struct drm_gem_object *gobj = ptr;
> @@ -815,6 +821,15 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, 
> void *data)
> else if (dma_buf)
> seq_printf(m, " exported as %p", dma_buf);
>
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
> +   amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
> +
> seq_printf(m, "\n");
>
> return 0;
> --
> 2.14.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 07/21] drm/amdkfd: Clean up KFD_MMAP_ offset handling

2018-05-11 Thread Felix Kuehling
On 2018-05-11 04:52 AM, Oded Gabbay wrote:
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -41,9 +41,33 @@
>>
>>  #define KFD_SYSFS_FILE_MODE 0444
>>
>> -#define KFD_MMAP_DOORBELL_MASK 0x8ull
>> -#define KFD_MMAP_EVENTS_MASK 0x4ull
>> -#define KFD_MMAP_RESERVED_MEM_MASK 0x2ull
>> +/* GPU ID hash width in bits */
>> +#define KFD_GPU_ID_HASH_WIDTH 16
>> +
>> +/* Use upper bits of mmap offset to store KFD driver specific information.
>> + * BITS[63:62] - Encode MMAP type
>> + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs 
>> to
>> + * BITS[45:0]  - MMAP offset value
>> + *
>> + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
>> + *  defines are w.r.t to PAGE_SIZE
>> + */
>> +#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
>> +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>> +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>> +#define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
>> +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
> Isn't this new definition breaks existing user-space library (kfd thunk) ?
> If that is the case we have a problem here.

No, this does not break user mode, because user mode isn't aware of
these definitions at all. The mmap offset comes from kernel mode and is
opaque to user mode. User mode just passes it back down to kernel mode
in the offset parameter of mmap. So the kernel can change the encoding
of the mmap offset without breaking user mode.

Regards,
  Felix

>
> Oded
>
>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu: drop printing the BO offset in the gem debugfs

2018-05-11 Thread Alex Deucher
On Fri, May 11, 2018 at 11:40 AM, Christian König
 wrote:
> It is meaningless anyway.
>
> Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 7d3dc229fa47..c3f8c5573a97 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -803,10 +803,6 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, 
> void *data)
> seq_printf(m, "\t0x%08x: %12ld byte %s",
>id, amdgpu_bo_size(bo), placement);
>
> -   offset = READ_ONCE(bo->tbo.mem.start);
> -   if (offset != AMDGPU_BO_INVALID_OFFSET)
> -   seq_printf(m, " @ 0x%010Lx", offset);
> -
> pin_count = READ_ONCE(bo->pin_count);
> if (pin_count)
> seq_printf(m, " pin count %d", pin_count);
> --
> 2.14.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/vi: don't enable DCE on a specific Polaris sku

2018-05-11 Thread Alex Deucher
It's fused off.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 4ac1288ab7df..4f844beba0db 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1650,14 +1650,18 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, _v8_1_ip_block);
amdgpu_device_ip_block_add(adev, _ih_ip_block);
amdgpu_device_ip_block_add(adev, _smu_ip_block);
-   if (adev->enable_virtual_display)
+   if (adev->enable_virtual_display) {
amdgpu_device_ip_block_add(adev, _virtual_ip_block);
+   } else if ((adev->pdev->device == 0x67df) &&
+  (adev->pdev->revision == 0xe0)) {
+   /* no display block, don't add the IP block */
 #if defined(CONFIG_DRM_AMD_DC)
-   else if (amdgpu_device_has_dc_support(adev))
-   amdgpu_device_ip_block_add(adev, _ip_block);
+   } else if (amdgpu_device_has_dc_support(adev)) {
+   amdgpu_device_ip_block_add(adev, _ip_block);
 #endif
-   else
+   } else {
amdgpu_device_ip_block_add(adev, _v11_2_ip_block);
+   }
amdgpu_device_ip_block_add(adev, _v8_0_ip_block);
amdgpu_device_ip_block_add(adev, _v3_1_ip_block);
amdgpu_device_ip_block_add(adev, _v6_3_ip_block);
-- 
2.13.6

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Fix underflow on Hawaii on 18.20 release branch

2018-05-11 Thread mikita.lipski
Thanks
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Fix display corruption on CI with dpm enabled

2018-05-11 Thread mikita.lipski
From: Rex Zhu 

with dpm enabled, need to get active crtcs in dc/no-dc mode.

caused by
'commit ebb649667a31 ("drm/amdgpu: Set pm_display_cfg in non-dc mode")'

Reviewed-by: Huang Rui 
Signed-off-by: Rex Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 2c82126..b455da4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1878,26 +1878,26 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device 
*adev)
amdgpu_fence_wait_empty(ring);
}
 
-   if (!amdgpu_device_has_dc_support(adev)) {
-   mutex_lock(>pm.mutex);
-   amdgpu_dpm_get_active_displays(adev);
-   adev->pm.pm_display_cfg.num_display = 
adev->pm.dpm.new_active_crtcs;
-   adev->pm.pm_display_cfg.vrefresh = 
amdgpu_dpm_get_vrefresh(adev);
-   adev->pm.pm_display_cfg.min_vblank_time = 
amdgpu_dpm_get_vblank_time(adev);
-   /* we have issues with mclk switching with refresh rates over 
120 hz on the non-DC code. */
-   if (adev->pm.pm_display_cfg.vrefresh > 120)
-   adev->pm.pm_display_cfg.min_vblank_time = 0;
-   if (adev->powerplay.pp_funcs->display_configuration_change)
-   adev->powerplay.pp_funcs->display_configuration_change(
-   
adev->powerplay.pp_handle,
-   
>pm.pm_display_cfg);
-   mutex_unlock(>pm.mutex);
-   }
-
if (adev->powerplay.pp_funcs->dispatch_tasks) {
+   if (!amdgpu_device_has_dc_support(adev)) {
+   mutex_lock(>pm.mutex);
+   amdgpu_dpm_get_active_displays(adev);
+   adev->pm.pm_display_cfg.num_display = 
adev->pm.dpm.new_active_crtcs;
+   adev->pm.pm_display_cfg.vrefresh = 
amdgpu_dpm_get_vrefresh(adev);
+   adev->pm.pm_display_cfg.min_vblank_time = 
amdgpu_dpm_get_vblank_time(adev);
+   /* we have issues with mclk switching with refresh 
rates over 120 hz on the non-DC code. */
+   if (adev->pm.pm_display_cfg.vrefresh > 120)
+   adev->pm.pm_display_cfg.min_vblank_time = 0;
+   if 
(adev->powerplay.pp_funcs->display_configuration_change)
+   
adev->powerplay.pp_funcs->display_configuration_change(
+   
adev->powerplay.pp_handle,
+   
>pm.pm_display_cfg);
+   mutex_unlock(>pm.mutex);
+   }
amdgpu_dpm_dispatch_task(adev, 
AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
} else {
mutex_lock(>pm.mutex);
+   amdgpu_dpm_get_active_displays(adev);
/* update battery/ac status */
if (power_supply_is_system_supplied() > 0)
adev->pm.dpm.ac_power = true;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 4/4] drm/nouveau: Switch to the generic underscan props

2018-05-11 Thread Boris Brezillon
Now that underscan props can be parsed by the core and assigned to
conn_state->underscan.xxx, we can rely on this implementation and get
rid of the nouveau-specific underscan props.

Signed-off-by: Boris Brezillon 
---
 drivers/gpu/drm/nouveau/nouveau_connector.c | 39 +
 drivers/gpu/drm/nouveau/nouveau_connector.h |  9 ---
 drivers/gpu/drm/nouveau/nouveau_display.c   | 14 ---
 drivers/gpu/drm/nouveau/nouveau_display.h   |  3 ---
 drivers/gpu/drm/nouveau/nv50_display.c  | 17 +
 5 files changed, 18 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c 
b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 6ed9cb053dfa..0ce055d3b89e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -105,12 +105,6 @@ nouveau_conn_atomic_get_property(struct drm_connector 
*connector,
 
if (property == dev->mode_config.scaling_mode_property)
*val = asyc->scaler.mode;
-   else if (property == disp->underscan_property)
-   *val = asyc->scaler.underscan.mode;
-   else if (property == disp->underscan_hborder_property)
-   *val = asyc->scaler.underscan.hborder;
-   else if (property == disp->underscan_vborder_property)
-   *val = asyc->scaler.underscan.vborder;
else if (property == disp->dithering_mode)
*val = asyc->dither.mode;
else if (property == disp->dithering_depth)
@@ -170,24 +164,6 @@ nouveau_conn_atomic_set_property(struct drm_connector 
*connector,
asyc->set.scaler = true;
}
} else
-   if (property == disp->underscan_property) {
-   if (asyc->scaler.underscan.mode != val) {
-   asyc->scaler.underscan.mode = val;
-   asyc->set.scaler = true;
-   }
-   } else
-   if (property == disp->underscan_hborder_property) {
-   if (asyc->scaler.underscan.hborder != val) {
-   asyc->scaler.underscan.hborder = val;
-   asyc->set.scaler = true;
-   }
-   } else
-   if (property == disp->underscan_vborder_property) {
-   if (asyc->scaler.underscan.vborder != val) {
-   asyc->scaler.underscan.vborder = val;
-   asyc->set.scaler = true;
-   }
-   } else
if (property == disp->dithering_mode) {
if (asyc->dither.mode != val) {
asyc->dither.mode = val;
@@ -256,7 +232,6 @@ nouveau_conn_reset(struct drm_connector *connector)
asyc->dither.mode = DITHERING_MODE_AUTO;
asyc->dither.depth = DITHERING_DEPTH_AUTO;
asyc->scaler.mode = DRM_MODE_SCALE_NONE;
-   asyc->scaler.underscan.mode = UNDERSCAN_OFF;
asyc->procamp.color_vibrance = 150;
asyc->procamp.vibrant_hue = 90;
 
@@ -285,18 +260,16 @@ nouveau_conn_attach_properties(struct drm_connector 
*connector)
   dvi_i_subconnector_property, 0);
 
/* Add overscan compensation options to digital outputs. */
-   if (disp->underscan_property &&
+   if (disp->disp.oclass >= NV50_DISP &&
(connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
 connector->connector_type == DRM_MODE_CONNECTOR_DVII ||
 connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
 connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)) {
-   drm_object_attach_property(>base,
-  disp->underscan_property,
-  UNDERSCAN_OFF);
-   drm_object_attach_property(>base,
-  disp->underscan_hborder_property, 0);
-   drm_object_attach_property(>base,
-  disp->underscan_vborder_property, 0);
+   WARN_ON(drm_connector_attach_underscan_properties(connector,
+   BIT(DRM_UNDERSCAN_OFF) |
+   BIT(DRM_UNDERSCAN_ON) |
+   BIT(DRM_UNDERSCAN_AUTO),
+   128, 128));
}
 
/* Add hue and saturation options. */
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h 
b/drivers/gpu/drm/nouveau/nouveau_connector.h
index a4d1a059bd3d..1d3ec65288e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -111,15 +111,6 @@ struct nouveau_conn_atom {
 
struct {
int mode;   /* DRM_MODE_SCALE_* */
-   struct {
-   enum {
-   UNDERSCAN_OFF,
-   UNDERSCAN_ON,
-   

[PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Boris Brezillon
Applying an underscan setup is just a matter of scaling all planes
appropriately and adjusting the CRTC X/Y offset to account for the
horizontal and vertical border.

Create an vc4_plane_underscan_adj() function doing that and call it from
vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
underscan properties to the HDMI connector.

Signed-off-by: Boris Brezillon 
---
Changes in v2:
- Take changes on hborder/vborder meaning into account
---
 drivers/gpu/drm/vc4/vc4_plane.c | 49 -
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 71d44c357d35..61ed60841cd6 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct drm_plane_state 
*state, int plane)
}
 }
 
+static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
+{
+   struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
+   struct drm_connector_state *conn_state = NULL;
+   struct drm_connector *conn;
+   struct drm_crtc_state *crtc_state;
+   int i;
+
+   for_each_new_connector_in_state(pstate->state, conn, conn_state, i) {
+   if (conn_state->crtc == pstate->crtc)
+   break;
+   }
+
+   if (i == pstate->state->num_connector)
+   return 0;
+
+   if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
+   return 0;
+
+   crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
+  pstate->crtc);
+
+   if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay ||
+   conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
+   return -EINVAL;
+
+   vc4_pstate->crtc_x += conn_state->underscan.hborder;
+   vc4_pstate->crtc_y += conn_state->underscan.vborder;
+   vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
+ (crtc_state->mode.hdisplay -
+  (conn_state->underscan.hborder * 2))) /
+crtc_state->mode.hdisplay;
+   vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
+ (crtc_state->mode.vdisplay -
+  (conn_state->underscan.vborder * 2))) /
+crtc_state->mode.vdisplay;
+
+   if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
+   return -EINVAL;
+
+   return 0;
+}
+
 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 {
struct drm_plane *plane = state->plane;
@@ -269,7 +312,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
drm_plane_state *state)
int num_planes = fb->format->num_planes;
u32 h_subsample = 1;
u32 v_subsample = 1;
-   int i;
+   int i, ret;
 
for (i = 0; i < num_planes; i++)
vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
@@ -292,6 +335,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
drm_plane_state *state)
vc4_state->crtc_w = state->crtc_w;
vc4_state->crtc_h = state->crtc_h;
 
+   ret = vc4_plane_underscan_adj(state);
+   if (ret)
+   return ret;
+
vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
   vc4_state->crtc_w);
vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 3/4] drm/vc4: Attach underscan props to the HDMI connector

2018-05-11 Thread Boris Brezillon
Now that the plane code takes the underscan setup into account, we can
safely attach the underscan props to the HDMI connector.

We also take care of filling AVI infoframes correctly to expose the
top/botton/left/right bar.

Note that these underscan props match pretty well the
overscan_{left,right,top,bottom} properties defined in config.txt and
parsed by the VC4 firmware.

Signed-off-by: Boris Brezillon 
---
Changes in v2:
- none
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 1a6db291d48b..17464b5981f9 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -323,6 +323,16 @@ static struct drm_connector 
*vc4_hdmi_connector_init(struct drm_device *dev,
   DRM_MODE_CONNECTOR_HDMIA);
drm_connector_helper_add(connector, _hdmi_connector_helper_funcs);
 
+   /* The hborder and vborder limit is arbitrarily set to 1024 which
+* should be more than enough for real use cases. Note that the actual
+* limitation comes from the display mode:
+*  hborder < hdisplay && vborder < vdisplay
+*/
+   drm_connector_attach_underscan_properties(connector,
+ BIT(DRM_UNDERSCAN_OFF) |
+ BIT(DRM_UNDERSCAN_ON),
+ 1024, 1024);
+
connector->polled = (DRM_CONNECTOR_POLL_CONNECT |
 DRM_CONNECTOR_POLL_DISCONNECT);
 
@@ -408,6 +418,9 @@ static void vc4_hdmi_write_infoframe(struct drm_encoder 
*encoder,
 static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
 {
struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
+   struct vc4_dev *vc4 = encoder->dev->dev_private;
+   struct vc4_hdmi *hdmi = vc4->hdmi;
+   struct drm_connector_state *cstate = hdmi->connector->state;
struct drm_crtc *crtc = encoder->crtc;
const struct drm_display_mode *mode = >state->adjusted_mode;
union hdmi_infoframe frame;
@@ -426,6 +439,18 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder 
*encoder)
   vc4_encoder->rgb_range_selectable,
   false);
 
+   if (cstate->underscan.mode == DRM_UNDERSCAN_ON) {
+   if (cstate->underscan.hborder) {
+   frame.avi.right_bar = cstate->underscan.hborder / 2;
+   frame.avi.left_bar = frame.avi.right_bar;
+   }
+
+   if (cstate->underscan.vborder) {
+   frame.avi.top_bar = cstate->underscan.vborder / 2;
+   frame.avi.bottom_bar = frame.avi.top_bar;
+   }
+   }
+
vc4_hdmi_write_infoframe(encoder, );
 }
 
-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 0/4] drm/connector: Provide generic support for underscan

2018-05-11 Thread Boris Brezillon
Hello,

This is an attempt at providing generic support for underscan connector
props. We already have 3 drivers defining the same underscan, underscan
vborder and underscan hborder properties (amd, radeon and nouveau) and
I am about to add a new one, hence my proposal to put the prop parsing
code in the core and add ->underscan fields to drm_connector_state.

In this v2, I also converted the nouveau driver to the generic
approach. The amdgpu and radeon ones can't be converted since they're
still not converted to the atomic interface.

Regards,

Boris 

Boris Brezillon (4):
  drm/connector: Add generic underscan properties
  drm/vc4: Take underscan setup into account when updating planes
  drm/vc4: Attach underscan props to the HDMI connector
  drm/nouveau: Switch to the generic underscan props

 drivers/gpu/drm/drm_atomic.c|  12 +++
 drivers/gpu/drm/drm_connector.c | 127 
 drivers/gpu/drm/nouveau/nouveau_connector.c |  39 ++---
 drivers/gpu/drm/nouveau/nouveau_connector.h |   9 --
 drivers/gpu/drm/nouveau/nouveau_display.c   |  14 ---
 drivers/gpu/drm/nouveau/nouveau_display.h   |   3 -
 drivers/gpu/drm/nouveau/nv50_display.c  |  17 ++--
 drivers/gpu/drm/vc4/vc4_hdmi.c  |  25 ++
 drivers/gpu/drm/vc4/vc4_plane.c |  49 ++-
 include/drm/drm_connector.h |  80 ++
 10 files changed, 310 insertions(+), 65 deletions(-)

-- 
2.14.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/4] drm/connector: Add generic underscan properties

2018-05-11 Thread Boris Brezillon
We have 3 drivers defining the "underscan", "underscan hborder" and
"underscan vborder" properties (radeon, amd and nouveau) and we are
about to add the same kind of thing in VC4.

Define generic underscan props and add new fields to the drm_connector
state so that the property parsing logic can be shared by all DRM
drivers.

A driver can now attach underscan properties to its connector through
the drm_connector_attach_underscan_properties() helper, and can
check/apply the underscan setup based on the
drm_connector_state->underscan fields.

Signed-off-by: Boris Brezillon 
---
Changes in v2:
- Add a new section in the connector props doc to describe underscan
  props
- Fix description of auto mode (auto means apply underscan for HDMI
  monitors only)
- Fix description of vborder/hborder:
right_border = left_border = hborder
top_border = bottom_border = vborder
  not
right_border = left_border = hborder / 2
top_border = bottom_border = vborder / 2
- Rename drm_underscan into drm_underscan_state
---
 drivers/gpu/drm/drm_atomic.c|  12 
 drivers/gpu/drm/drm_connector.c | 127 
 include/drm/drm_connector.h |  80 +
 3 files changed, 219 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index dc850b4b6e21..b7312bd172c9 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1278,6 +1278,12 @@ static int drm_atomic_connector_set_property(struct 
drm_connector *connector,
return -EINVAL;
}
state->content_protection = val;
+   } else if (property == connector->underscan_mode_property) {
+   state->underscan.mode = val;
+   } else if (property == connector->underscan_hborder_property) {
+   state->underscan.hborder = val;
+   } else if (property == connector->underscan_vborder_property) {
+   state->underscan.vborder = val;
} else if (connector->funcs->atomic_set_property) {
return connector->funcs->atomic_set_property(connector,
state, property, val);
@@ -1359,6 +1365,12 @@ drm_atomic_connector_get_property(struct drm_connector 
*connector,
*val = state->scaling_mode;
} else if (property == connector->content_protection_property) {
*val = state->content_protection;
+   } else if (property == connector->underscan_mode_property) {
+   *val = state->underscan.mode;
+   } else if (property == connector->underscan_hborder_property) {
+   *val = state->underscan.hborder;
+   } else if (property == connector->underscan_vborder_property) {
+   *val = state->underscan.vborder;
} else if (connector->funcs->atomic_get_property) {
return connector->funcs->atomic_get_property(connector,
state, property, val);
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9b9ba5d5ec0c..826af6267a4f 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -914,6 +914,38 @@ DRM_ENUM_NAME_FN(drm_get_content_protection_name, 
drm_cp_enum_list)
  * can also expose this property to external outputs, in which case they
  * must support "None", which should be the default (since external screens
  * have a built-in scaler).
+ *
+ * Connectors for non-analog outputs may also have standardized underscan
+ * properties (drivers can set this up by calling
+ * drm_connector_attach_content_protection_property() on initialization):
+ *
+ * underscan:
+ * This properties defines whether underscan is activated or not, and when
+ * it is activated, how the horizontal and vertical borders are calculated:
+ *
+ * off:
+ * Underscan is disabled. The output image shouldn't be scaled to
+ * take screen borders into account.
+ * on:
+ * Underscan is activated and horizontal and vertical borders are
+ * specified through the "underscan hborder" and
+ * "underscan vborder" properties.
+ * auto:
+ * Underscan is activated only for HDMI monitors.
+ *
+ * underscan hborder:
+ * Horizontal border expressed in pixels. The border is symmetric, which
+ * means you'll have a border of 'hborder' pixels on the left and on the
+ * same border on the right.
+ * When this value is 0 and underscan is "on" or "auto", the driver will
+ * pick a default value (the default value is driver dependent).
+ *
+ * underscan vborder:
+ * Vertical border expressed in pixels. The border is symmetric, which
+ * means you'll have a border of 'vborder' pixels on the top and the same
+ * border on the bottom.
+ * When this value is 0 and underscan is "on" or "auto", the driver will
+ * pick a default value (the 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Ville Syrjälä
On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:
> Applying an underscan setup is just a matter of scaling all planes
> appropriately and adjusting the CRTC X/Y offset to account for the
> horizontal and vertical border.
> 
> Create an vc4_plane_underscan_adj() function doing that and call it from
> vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
> underscan properties to the HDMI connector.
> 
> Signed-off-by: Boris Brezillon 
> ---
> Changes in v2:
> - Take changes on hborder/vborder meaning into account
> ---
>  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> -
>  1 file changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
> index 71d44c357d35..61ed60841cd6 100644
> --- a/drivers/gpu/drm/vc4/vc4_plane.c
> +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct drm_plane_state 
> *state, int plane)
>   }
>  }
>  
> +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> +{
> + struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> + struct drm_connector_state *conn_state = NULL;
> + struct drm_connector *conn;
> + struct drm_crtc_state *crtc_state;
> + int i;
> +
> + for_each_new_connector_in_state(pstate->state, conn, conn_state, i) {
> + if (conn_state->crtc == pstate->crtc)
> + break;
> + }
> +
> + if (i == pstate->state->num_connector)
> + return 0;
> +
> + if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> + return 0;
> +
> + crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> +pstate->crtc);
> +
> + if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay ||
> + conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> + return -EINVAL;

border * 2 ?

> +
> + vc4_pstate->crtc_x += conn_state->underscan.hborder;
> + vc4_pstate->crtc_y += conn_state->underscan.vborder;
> + vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> +   (crtc_state->mode.hdisplay -
> +(conn_state->underscan.hborder * 2))) /
> +  crtc_state->mode.hdisplay;
> + vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> +   (crtc_state->mode.vdisplay -
> +(conn_state->underscan.vborder * 2))) /
> +  crtc_state->mode.vdisplay;

So you're now scaling all planes? The code seems to reject scaling for
the cursor plane, how are you dealing with that? Or just no cursor
allowed when underscanning?

I'm also wondering if there's any way we can reconcile these border
props with the scaling mode prop, should we ever wish to expose
these props on connectors that already have the scaling mode prop.
Maybe we just have to require the scaling mode to be set to fullscreen 
to allow borders to be specified explictly?

> +
> + if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
>  static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state 
> *state)
>  {
>   struct drm_plane *plane = state->plane;
> @@ -269,7 +312,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
> drm_plane_state *state)
>   int num_planes = fb->format->num_planes;
>   u32 h_subsample = 1;
>   u32 v_subsample = 1;
> - int i;
> + int i, ret;
>  
>   for (i = 0; i < num_planes; i++)
>   vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
> @@ -292,6 +335,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
> drm_plane_state *state)
>   vc4_state->crtc_w = state->crtc_w;
>   vc4_state->crtc_h = state->crtc_h;
>  
> + ret = vc4_plane_underscan_adj(state);
> + if (ret)
> + return ret;
> +
>   vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
>  vc4_state->crtc_w);
>   vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
> -- 
> 2.14.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Ville Syrjälä
Intel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add HDP flush dummy for UVD 6/7

2018-05-11 Thread Christian König

Just did so.

Sorry for the delay yesterday was a holiday here and well you know how 
long it takes to catch up on mails :)


Christian.

Am 11.05.2018 um 16:09 schrieb Alex Deucher:

Are you going to push this or should I?

Alex

On Tue, May 8, 2018 at 11:46 AM, Deucher, Alexander
 wrote:

Acked-by: Alex Deucher 


From: amd-gfx  on behalf of Christian
König 
Sent: Tuesday, May 8, 2018 6:30:06 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: add HDP flush dummy for UVD 6/7

The UVD firmware doesn't seem to like the HDP flush here.

This worked for years without HDP flush, so just skip it.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 ++--
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 13 -
  2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 6d3359889c0b..8041b26a7a21 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -963,6 +963,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct
amdgpu_ring *ring, u64 addr,
  amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
  }

+/**
+ * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+   /* The firmware doesn't seem to like touching registers at this
point. */
+}
+
  /**
   * uvd_v6_0_ring_test_ring - register write test
   *
@@ -1528,12 +1538,13 @@ static const struct amdgpu_ring_funcs
uvd_v6_0_ring_phys_funcs = {
  .set_wptr = uvd_v6_0_ring_set_wptr,
  .parse_cs = amdgpu_uvd_ring_parse_cs,
  .emit_frame_size =
-   6 + 6 + /* hdp flush / invalidate */
+   6 + /* hdp invalidate */
  10 + /* uvd_v6_0_ring_emit_pipeline_sync */
  14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
  .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
  .emit_ib = uvd_v6_0_ring_emit_ib,
  .emit_fence = uvd_v6_0_ring_emit_fence,
+   .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
  .test_ring = uvd_v6_0_ring_test_ring,
  .test_ib = amdgpu_uvd_ring_test_ib,
  .insert_nop = amdgpu_ring_insert_nop,
@@ -1552,7 +1563,7 @@ static const struct amdgpu_ring_funcs
uvd_v6_0_ring_vm_funcs = {
  .get_wptr = uvd_v6_0_ring_get_wptr,
  .set_wptr = uvd_v6_0_ring_set_wptr,
  .emit_frame_size =
-   6 + 6 + /* hdp flush / invalidate */
+   6 + /* hdp invalidate */
  10 + /* uvd_v6_0_ring_emit_pipeline_sync */
  VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /*
uvd_v6_0_ring_emit_vm_flush */
  14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
@@ -1561,6 +1572,7 @@ static const struct amdgpu_ring_funcs
uvd_v6_0_ring_vm_funcs = {
  .emit_fence = uvd_v6_0_ring_emit_fence,
  .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
  .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+   .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
  .test_ring = uvd_v6_0_ring_test_ring,
  .test_ib = amdgpu_uvd_ring_test_ib,
  .insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 2251db4048f5..b0de1e04093b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1135,6 +1135,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct
amdgpu_ring *ring, u64 addr,
  amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
  }

+/**
+ * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+   /* The firmware doesn't seem to like touching registers at this
point. */
+}
+
  /**
   * uvd_v7_0_ring_test_ring - register write test
   *
@@ -1654,7 +1664,7 @@ static const struct amdgpu_ring_funcs
uvd_v7_0_ring_vm_funcs = {
  .get_wptr = uvd_v7_0_ring_get_wptr,
  .set_wptr = uvd_v7_0_ring_set_wptr,
  .emit_frame_size =
-   6 + 6 + /* hdp flush / invalidate */
+   6 + /* hdp invalidate */
  SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
  SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
  8 + /* uvd_v7_0_ring_emit_vm_flush */
@@ -1663,6 +1673,7 @@ static const struct amdgpu_ring_funcs
uvd_v7_0_ring_vm_funcs = {
  .emit_ib = uvd_v7_0_ring_emit_ib,
  .emit_fence = uvd_v7_0_ring_emit_fence,
  .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
+   .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
  .test_ring = uvd_v7_0_ring_test_ring,
  .test_ib = 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Boris Brezillon
On Fri, 11 May 2018 18:34:50 +0300
Ville Syrjälä  wrote:

> On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:
> > Applying an underscan setup is just a matter of scaling all planes
> > appropriately and adjusting the CRTC X/Y offset to account for the
> > horizontal and vertical border.
> > 
> > Create an vc4_plane_underscan_adj() function doing that and call it from
> > vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
> > underscan properties to the HDMI connector.
> > 
> > Signed-off-by: Boris Brezillon 
> > ---
> > Changes in v2:
> > - Take changes on hborder/vborder meaning into account
> > ---
> >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > -
> >  1 file changed, 48 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > b/drivers/gpu/drm/vc4/vc4_plane.c
> > index 71d44c357d35..61ed60841cd6 100644
> > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct drm_plane_state 
> > *state, int plane)
> > }
> >  }
> >  
> > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> > +{
> > +   struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > +   struct drm_connector_state *conn_state = NULL;
> > +   struct drm_connector *conn;
> > +   struct drm_crtc_state *crtc_state;
> > +   int i;
> > +
> > +   for_each_new_connector_in_state(pstate->state, conn, conn_state, i) {
> > +   if (conn_state->crtc == pstate->crtc)
> > +   break;
> > +   }
> > +
> > +   if (i == pstate->state->num_connector)
> > +   return 0;
> > +
> > +   if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > +   return 0;
> > +
> > +   crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > +  pstate->crtc);
> > +
> > +   if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay ||
> > +   conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > +   return -EINVAL;  
> 
> border * 2 ?

Oops, indeed. I'll fix that.

> 
> > +
> > +   vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > +   vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > +   vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > + (crtc_state->mode.hdisplay -
> > +  (conn_state->underscan.hborder * 2))) /
> > +crtc_state->mode.hdisplay;
> > +   vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > + (crtc_state->mode.vdisplay -
> > +  (conn_state->underscan.vborder * 2))) /
> > +crtc_state->mode.vdisplay;  
> 
> So you're now scaling all planes? The code seems to reject scaling for
> the cursor plane, how are you dealing with that? Or just no cursor
> allowed when underscanning?

No, I just didn't test with a cursor plane. We should probably avoid
scaling the cursor plane and just adjust its position. Eric any opinion
on that?

> 
> I'm also wondering if there's any way we can reconcile these border
> props with the scaling mode prop, should we ever wish to expose
> these props on connectors that already have the scaling mode prop.

Nouveau seems to expose both, and I don't see why we couldn't.

> Maybe we just have to require the scaling mode to be set to fullscreen 
> to allow borders to be specified explictly?
> 
> > +
> > +   if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
> > +   return -EINVAL;
> > +
> > +   return 0;
> > +}
> > +
> >  static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state 
> > *state)
> >  {
> > struct drm_plane *plane = state->plane;
> > @@ -269,7 +312,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
> > drm_plane_state *state)
> > int num_planes = fb->format->num_planes;
> > u32 h_subsample = 1;
> > u32 v_subsample = 1;
> > -   int i;
> > +   int i, ret;
> >  
> > for (i = 0; i < num_planes; i++)
> > vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
> > @@ -292,6 +335,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct 
> > drm_plane_state *state)
> > vc4_state->crtc_w = state->crtc_w;
> > vc4_state->crtc_h = state->crtc_h;
> >  
> > +   ret = vc4_plane_underscan_adj(state);
> > +   if (ret)
> > +   return ret;
> > +
> > vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
> >vc4_state->crtc_w);
> > vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
> > -- 
> > 2.14.1
> > 
> > ___
> > dri-devel mailing list
> > dri-de...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel  
> 

___
amd-gfx mailing list

Fix underflow on Hawaii on 18.20 release branch

2018-05-11 Thread mikita.lipski
Rex Zhu's patch fixes an underflow issue observed on Hawaii. The patch is 
in drm-next-amd-staging branch, need an approval to port the patch into 18.20
release branch in order to resolve a ticket SWDEV-152336.

Thanks
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/connector: Add generic underscan properties

2018-05-11 Thread Boris Brezillon
On Mon, 7 May 2018 17:25:30 +0200
Daniel Vetter  wrote:

> On Mon, May 07, 2018 at 05:15:33PM +0200, Daniel Vetter wrote:
> > On Mon, May 07, 2018 at 04:44:32PM +0200, Boris Brezillon wrote:  
> > > We have 3 drivers defining the "underscan", "underscan hborder" and
> > > "underscan vborder" properties (radeon, amd and nouveau) and we are
> > > about to add the same kind of thing in VC4.
> > > 
> > > Define generic underscan props and add new fields to the drm_connector
> > > state so that the property parsing logic can be shared by all DRM
> > > drivers.
> > > 
> > > A driver can now attach underscan properties to its connector through
> > > the drm_connector_attach_underscan_properties() helper, and can
> > > check/apply the underscan setup based on the
> > > drm_connector_state->underscan fields.
> > > 
> > > Signed-off-by: Boris Brezillon 
> > > ---
> > >  drivers/gpu/drm/drm_atomic.c|  12 
> > >  drivers/gpu/drm/drm_connector.c | 120 
> > > 
> > >  include/drm/drm_connector.h |  78 ++
> > >  3 files changed, 210 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
> > > index dc850b4b6e21..b7312bd172c9 100644
> > > --- a/drivers/gpu/drm/drm_atomic.c
> > > +++ b/drivers/gpu/drm/drm_atomic.c
> > > @@ -1278,6 +1278,12 @@ static int 
> > > drm_atomic_connector_set_property(struct drm_connector *connector,
> > >   return -EINVAL;
> > >   }
> > >   state->content_protection = val;
> > > + } else if (property == connector->underscan_mode_property) {
> > > + state->underscan.mode = val;
> > > + } else if (property == connector->underscan_hborder_property) {
> > > + state->underscan.hborder = val;
> > > + } else if (property == connector->underscan_vborder_property) {
> > > + state->underscan.vborder = val;
> > >   } else if (connector->funcs->atomic_set_property) {
> > >   return connector->funcs->atomic_set_property(connector,
> > >   state, property, val);
> > > @@ -1359,6 +1365,12 @@ drm_atomic_connector_get_property(struct 
> > > drm_connector *connector,
> > >   *val = state->scaling_mode;
> > >   } else if (property == connector->content_protection_property) {
> > >   *val = state->content_protection;
> > > + } else if (property == connector->underscan_mode_property) {
> > > + *val = state->underscan.mode;
> > > + } else if (property == connector->underscan_hborder_property) {
> > > + *val = state->underscan.hborder;
> > > + } else if (property == connector->underscan_vborder_property) {
> > > + *val = state->underscan.vborder;
> > >   } else if (connector->funcs->atomic_get_property) {
> > >   return connector->funcs->atomic_get_property(connector,
> > >   state, property, val);
> > > diff --git a/drivers/gpu/drm/drm_connector.c 
> > > b/drivers/gpu/drm/drm_connector.c
> > > index dfc8ca1e9413..9937390b8a25 100644
> > > --- a/drivers/gpu/drm/drm_connector.c
> > > +++ b/drivers/gpu/drm/drm_connector.c
> > > @@ -914,6 +914,31 @@ DRM_ENUM_NAME_FN(drm_get_content_protection_name, 
> > > drm_cp_enum_list)
> > >   *   can also expose this property to external outputs, in which 
> > > case they
> > >   *   must support "None", which should be the default (since 
> > > external screens
> > >   *   have a built-in scaler).  
> > 
> > I think a new section here would be good, to make it more obvious this is
> > a group of properties. Plus a reference to
> > drm_connector_attach_underscan_properties().
> >   
> > > + *
> > > + * underscan:
> > > + *   This properties defines whether underscan is activated or not, 
> > > and when
> > > + *   it is activated, how the horizontal and vertical borders are 
> > > calculated:
> > > + *
> > > + *   off:
> > > + *   Underscan is disabled. The output image shouldn't be 
> > > scaled to
> > > + *   take screen borders into account.
> > > + *   on:
> > > + *   Underscan is activated and horizontal and vertical 
> > > borders are
> > > + *   specified through the "underscan hborder" and
> > > + *   "underscan vborder" properties.
> > > + *   auto:
> > > + *   Underscan is activated and horizontal and vertical 
> > > borders are
> > > + *   automatically chosen by the driver.
> > > + *
> > > + * underscan hborder:
> > > + *   Horizontal border expressed in pixels. The border is symmetric, 
> > > which
> > > + *   means you'll have half of this value placed on the left and the 
> > > other
> > > + *   half on the right.
> > > + *
> > > + * underscan vborder:
> > > + *   Vertical border expressed in pixels. The border is symmetric, 
> > > which
> > > + *   means you'll have half of this value placed on the top and the 
> 

Re: [PATCH 1/3] drm/connector: Add generic underscan properties

2018-05-11 Thread Boris Brezillon
On Tue, 8 May 2018 10:18:10 +1000
Ben Skeggs  wrote:

> On 8 May 2018 at 04:26, Harry Wentland  wrote:
> >
> >
> > On 2018-05-07 12:19 PM, Boris Brezillon wrote:  
> >> On Mon, 7 May 2018 18:01:44 +0300
> >> Ville Syrjälä  wrote:
> >>  
> >>> On Mon, May 07, 2018 at 04:44:32PM +0200, Boris Brezillon wrote:  
>  We have 3 drivers defining the "underscan", "underscan hborder" and
>  "underscan vborder" properties (radeon, amd and nouveau) and we are
>  about to add the same kind of thing in VC4.
> 
>  Define generic underscan props and add new fields to the drm_connector
>  state so that the property parsing logic can be shared by all DRM
>  drivers.
> 
>  A driver can now attach underscan properties to its connector through
>  the drm_connector_attach_underscan_properties() helper, and can
>  check/apply the underscan setup based on the
>  drm_connector_state->underscan fields.
> 
>  Signed-off-by: Boris Brezillon 
>  ---
>   drivers/gpu/drm/drm_atomic.c|  12 
>   drivers/gpu/drm/drm_connector.c | 120 
>  
>   include/drm/drm_connector.h |  78 ++
>   3 files changed, 210 insertions(+)
> 
>  diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
>  index dc850b4b6e21..b7312bd172c9 100644
>  --- a/drivers/gpu/drm/drm_atomic.c
>  +++ b/drivers/gpu/drm/drm_atomic.c
>  @@ -1278,6 +1278,12 @@ static int 
>  drm_atomic_connector_set_property(struct drm_connector *connector,
>  return -EINVAL;
>  }
>  state->content_protection = val;
>  +   } else if (property == connector->underscan_mode_property) {
>  +   state->underscan.mode = val;
>  +   } else if (property == connector->underscan_hborder_property) {
>  +   state->underscan.hborder = val;
>  +   } else if (property == connector->underscan_vborder_property) {
>  +   state->underscan.vborder = val;
>  } else if (connector->funcs->atomic_set_property) {
>  return connector->funcs->atomic_set_property(connector,
>  state, property, val);
>  @@ -1359,6 +1365,12 @@ drm_atomic_connector_get_property(struct 
>  drm_connector *connector,
>  *val = state->scaling_mode;
>  } else if (property == connector->content_protection_property) {
>  *val = state->content_protection;
>  +   } else if (property == connector->underscan_mode_property) {
>  +   *val = state->underscan.mode;
>  +   } else if (property == connector->underscan_hborder_property) {
>  +   *val = state->underscan.hborder;
>  +   } else if (property == connector->underscan_vborder_property) {
>  +   *val = state->underscan.vborder;
>  } else if (connector->funcs->atomic_get_property) {
>  return connector->funcs->atomic_get_property(connector,
>  state, property, val);
>  diff --git a/drivers/gpu/drm/drm_connector.c 
>  b/drivers/gpu/drm/drm_connector.c
>  index dfc8ca1e9413..9937390b8a25 100644
>  --- a/drivers/gpu/drm/drm_connector.c
>  +++ b/drivers/gpu/drm/drm_connector.c
>  @@ -914,6 +914,31 @@ DRM_ENUM_NAME_FN(drm_get_content_protection_name, 
>  drm_cp_enum_list)
>    * can also expose this property to external outputs, in which case they
>    * must support "None", which should be the default (since external 
>  screens
>    * have a built-in scaler).
>  + *
>  + * underscan:
>  + * This properties defines whether underscan is activated or not, and 
>  when
>  + * it is activated, how the horizontal and vertical borders are 
>  calculated:
>  + *
>  + * off:
>  + * Underscan is disabled. The output image shouldn't be scaled 
>  to
>  + * take screen borders into account.  
> >>>  
>  + * on:
>  + * Underscan is activated and horizontal and vertical borders 
>  are
>  + * specified through the "underscan hborder" and
>  + * "underscan vborder" properties.  
> >>>
> >>> How is the output scaled?  
> >>
> >> In HW. The formula is
> >>
> >>   hfactor = (hdisplay - hborder) / hdisplay
> >>   vfactor = (vdisplay - vborder) / vdisplay
> >>  
> >>> What does the user mode hdisplay/vdisplay mean
> >>> in this case?  
> >>
> >> The same as before this patch: the output resolution. You just add
> >> black margins.
> >>  
> >>> What if I want underscan without scaling?  
> >>
> >> Then don't involve the DRM driver and do that from userspace: just
> >> fill the visible portion of the framebuffer and leave the rest black.
> >> There nothing the DRM driver 

Re: [PATCH] drm/amdgpu: add HDP flush dummy for UVD 6/7

2018-05-11 Thread Alex Deucher
Are you going to push this or should I?

Alex

On Tue, May 8, 2018 at 11:46 AM, Deucher, Alexander
 wrote:
> Acked-by: Alex Deucher 
>
> 
> From: amd-gfx  on behalf of Christian
> König 
> Sent: Tuesday, May 8, 2018 6:30:06 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH] drm/amdgpu: add HDP flush dummy for UVD 6/7
>
> The UVD firmware doesn't seem to like the HDP flush here.
>
> This worked for years without HDP flush, so just skip it.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 ++--
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 13 -
>  2 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 6d3359889c0b..8041b26a7a21 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -963,6 +963,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct
> amdgpu_ring *ring, u64 addr,
>  amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
>  }
>
> +/**
> + * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
> + *
> + * @ring: amdgpu_ring pointer
> + */
> +static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> +{
> +   /* The firmware doesn't seem to like touching registers at this
> point. */
> +}
> +
>  /**
>   * uvd_v6_0_ring_test_ring - register write test
>   *
> @@ -1528,12 +1538,13 @@ static const struct amdgpu_ring_funcs
> uvd_v6_0_ring_phys_funcs = {
>  .set_wptr = uvd_v6_0_ring_set_wptr,
>  .parse_cs = amdgpu_uvd_ring_parse_cs,
>  .emit_frame_size =
> -   6 + 6 + /* hdp flush / invalidate */
> +   6 + /* hdp invalidate */
>  10 + /* uvd_v6_0_ring_emit_pipeline_sync */
>  14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
>  .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
>  .emit_ib = uvd_v6_0_ring_emit_ib,
>  .emit_fence = uvd_v6_0_ring_emit_fence,
> +   .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
>  .test_ring = uvd_v6_0_ring_test_ring,
>  .test_ib = amdgpu_uvd_ring_test_ib,
>  .insert_nop = amdgpu_ring_insert_nop,
> @@ -1552,7 +1563,7 @@ static const struct amdgpu_ring_funcs
> uvd_v6_0_ring_vm_funcs = {
>  .get_wptr = uvd_v6_0_ring_get_wptr,
>  .set_wptr = uvd_v6_0_ring_set_wptr,
>  .emit_frame_size =
> -   6 + 6 + /* hdp flush / invalidate */
> +   6 + /* hdp invalidate */
>  10 + /* uvd_v6_0_ring_emit_pipeline_sync */
>  VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /*
> uvd_v6_0_ring_emit_vm_flush */
>  14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
> @@ -1561,6 +1572,7 @@ static const struct amdgpu_ring_funcs
> uvd_v6_0_ring_vm_funcs = {
>  .emit_fence = uvd_v6_0_ring_emit_fence,
>  .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
>  .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
> +   .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
>  .test_ring = uvd_v6_0_ring_test_ring,
>  .test_ib = amdgpu_uvd_ring_test_ib,
>  .insert_nop = amdgpu_ring_insert_nop,
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 2251db4048f5..b0de1e04093b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1135,6 +1135,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct
> amdgpu_ring *ring, u64 addr,
>  amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
>  }
>
> +/**
> + * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
> + *
> + * @ring: amdgpu_ring pointer
> + */
> +static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> +{
> +   /* The firmware doesn't seem to like touching registers at this
> point. */
> +}
> +
>  /**
>   * uvd_v7_0_ring_test_ring - register write test
>   *
> @@ -1654,7 +1664,7 @@ static const struct amdgpu_ring_funcs
> uvd_v7_0_ring_vm_funcs = {
>  .get_wptr = uvd_v7_0_ring_get_wptr,
>  .set_wptr = uvd_v7_0_ring_set_wptr,
>  .emit_frame_size =
> -   6 + 6 + /* hdp flush / invalidate */
> +   6 + /* hdp invalidate */
>  SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
>  SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
>  8 + /* uvd_v7_0_ring_emit_vm_flush */
> @@ -1663,6 +1673,7 @@ static const struct amdgpu_ring_funcs
> uvd_v7_0_ring_vm_funcs = {
>  .emit_ib = uvd_v7_0_ring_emit_ib,
>  .emit_fence = uvd_v7_0_ring_emit_fence,
>  .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
> +   .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
>  .test_ring = uvd_v7_0_ring_test_ring,
>  .test_ib 

Re: [PATCH] drm/amdgpu/gmc9: remove unused register defs

2018-05-11 Thread Christian König

Am 10.05.2018 um 22:21 schrieb Alex Deucher:

These got moved to the new df module so no longer
used in this file.

Signed-off-by: Alex Deucher 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 --
  1 file changed, 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6c9f7f999532..6cccf0e0acd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -43,20 +43,6 @@
  #include "gfxhub_v1_0.h"
  #include "mmhub_v1_0.h"
  
-#define mmDF_CS_AON0_DramBaseAddress0  0x0044

-#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 
0
-//DF_CS_AON0_DramBaseAddress0
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 
   0x0
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 
   0x1
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT   
   0x4
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT   
   0x8
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT   
   0xc
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK   
   0x0001L
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK   
   0x0002L
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 
   0x00F0L
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 
   0x0700L
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 
   0xF000L
-
  /* add these here since we already include dce12 headers and these are for 
DCN */
  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 
 0x055d
  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX
 2


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads

2018-05-11 Thread Alex Deucher
On Fri, May 11, 2018 at 3:36 AM, Oded Gabbay  wrote:
> Hi Felix,
> The patch looks fine to me and I can ACK it, but I would prefer that
> Alex or Christian will review it as well before pushing it.

Not my area of expertise, but it looks correct to me.
Acked-by: Alex Deucher 

> Thanks,
> Oded
>
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling  
> wrote:
>> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
>> thread rather than regular process context. This will be used when
>> KFD userptr BOs are restored after an MMU-notifier eviction.
>>
>> v2: Manage task reference with get_task_struct/put_task_struct
>>
>> Signed-off-by: Felix Kuehling 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 
>> +
>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c2fae04..25490fe 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
>> struct ttm_dma_tt   ttm;
>> u64 offset;
>> uint64_tuserptr;
>> -   struct mm_struct*usermm;
>> +   struct task_struct  *usertask;
>> uint32_tuserflags;
>> spinlock_t  guptasklock;
>> struct list_headguptasks;
>> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>  {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +   struct mm_struct *mm = gtt->usertask->mm;
>> unsigned int flags = 0;
>> unsigned pinned = 0;
>> int r;
>>
>> +   if (!mm) /* Happens during process shutdown */
>> +   return -ESRCH;
>> +
>> if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> flags |= FOLL_WRITE;
>>
>> -   down_read(>mm->mmap_sem);
>> +   down_read(>mmap_sem);
>>
>> if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>> /* check that we only use anonymous memory
>> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>> unsigned long end = gtt->userptr + ttm->num_pages * 
>> PAGE_SIZE;
>> struct vm_area_struct *vma;
>>
>> -   vma = find_vma(gtt->usermm, gtt->userptr);
>> +   vma = find_vma(mm, gtt->userptr);
>> if (!vma || vma->vm_file || vma->vm_end < end) {
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return -EPERM;
>> }
>> }
>> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>> list_add(, >guptasks);
>> spin_unlock(>guptasklock);
>>
>> -   r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +   if (mm == current->mm)
>> +   r = get_user_pages(userptr, num_pages, flags, p, 
>> NULL);
>> +   else
>> +   r = get_user_pages_remote(gtt->usertask,
>> +   mm, userptr, num_pages,
>> +   flags, p, NULL, NULL);
>>
>> spin_lock(>guptasklock);
>> list_del();
>> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
>> struct page **pages)
>>
>> } while (pinned < ttm->num_pages);
>>
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return 0;
>>
>>  release_pages:
>> release_pages(pages, pinned);
>> -   up_read(>mm->mmap_sem);
>> +   up_read(>mmap_sem);
>> return r;
>>  }
>>
>> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt 
>> *ttm)
>>  {
>> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>
>> +   if (gtt->usertask)
>> +   put_task_struct(gtt->usertask);
>> +
>> ttm_dma_tt_fini(>ttm);
>> kfree(gtt);
>>  }
>> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, 
>> uint64_t addr,
>> return -EINVAL;
>>
>> gtt->userptr = addr;
>> -   gtt->usermm = current->mm;
>> gtt->userflags = flags;
>> +
>> +   if (gtt->usertask)
>> +   put_task_struct(gtt->usertask);
>> +   gtt->usertask = current->group_leader;
>> +   get_task_struct(gtt->usertask);
>> +
>> spin_lock_init(>guptasklock);
>> INIT_LIST_HEAD(>guptasks);
>> atomic_set(>mmu_invalidations, 0);
>> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct 
>> ttm_tt *ttm)
>> if (gtt == NULL)
>> return NULL;
>>
>> -   return 

Re: [PATCH 11/21] drm/amdkfd: Add GFXv9 MQD manager

2018-05-11 Thread Felix Kuehling
This patch series was meant to be applied after the userptr changes. I
haven't tested this without the userptr changes.

I think your main concern about userptr is the use of GFP_NOIO. I
remember considering memalloc_noio_save/restore when I worked on this
over a year ago. I found an old email thread I had with Christian about
this (subject: MMU notifier deadlock on kernel 4.9):

> memalloc_noio_save doesn't affect kmalloc directly. It sets
> current->flags, which is used deep inside the page allocator, after
> lockdep_trace_alloc(gfp) flags a lock as being used with IO enabled. The
> slob allocator looks at gfp_allowed_mask, but I'm not sure how safe it
> is to change that in a driver and it doesn't seem to be an exported
> symbol anyway.
Maybe this has changed in the mean time, but a year ago, using
memalloc_noio_save/restore may have prevented real deadlocks, but would
not shut up the lockdep warnings. Maybe this has changed in the mean
time. FWIW, I don't find lockdep_trace_alloc in the current kernel.

Regards,
  Felix


On 2018-05-11 05:10 AM, Oded Gabbay wrote:
> On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling  
> wrote:
>> Signed-off-by: John Bridgman 
>> Signed-off-by: Jay Cornwall 
>> Signed-off-by: Felix Kuehling 
>> ---
>>  drivers/gpu/drm/amd/amdkfd/Makefile |   1 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_device.c |   2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c|   3 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 443 
>> 
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   3 +
>>  5 files changed, 451 insertions(+), 1 deletion(-)
>>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
>> b/drivers/gpu/drm/amd/amdkfd/Makefile
>> index 52b3c1b..094b591 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
>> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
>> @@ -30,6 +30,7 @@ amdkfd-y  := kfd_module.o kfd_device.o kfd_chardev.o 
>> kfd_topology.o \
>> kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
>> kfd_process.o kfd_queue.o kfd_mqd_manager.o \
>> kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
>> +   kfd_mqd_manager_v9.o \
>> kfd_kernel_queue.o kfd_kernel_queue_cik.o \
>> kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
>> kfd_packet_manager.o kfd_process_queue_manager.o \
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index f563acb..c368ce3 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -700,7 +700,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned 
>> int size,
>> if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
>> return -ENOMEM;
>>
>> -   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>> +   *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
> This assumes the patch in the userptr patch-set is applied. I changed
> it to GFP_KERNEL for now.
>
>> if ((*mem_obj) == NULL)
>> return -ENOMEM;
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> index ee7061e..4b8eb50 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>> @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE 
>> type,
>> case CHIP_POLARIS10:
>> case CHIP_POLARIS11:
>> return mqd_manager_init_vi_tonga(type, dev);
>> +   case CHIP_VEGA10:
>> +   case CHIP_RAVEN:
>> +   return mqd_manager_init_v9(type, dev);
>> default:
>> WARN(1, "Unexpected ASIC family %u",
>>  dev->device_info->asic_family);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> new file mode 100644
>> index 000..684054f
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> @@ -0,0 +1,443 @@
>> +/*
>> + * Copyright 2016-2018 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included 
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE 

Re: [PATCH 07/21] drm/amdkfd: Clean up KFD_MMAP_ offset handling

2018-05-11 Thread Oded Gabbay
On Fri, May 11, 2018 at 6:57 PM, Felix Kuehling  wrote:
> On 2018-05-11 04:52 AM, Oded Gabbay wrote:
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -41,9 +41,33 @@
>>>
>>>  #define KFD_SYSFS_FILE_MODE 0444
>>>
>>> -#define KFD_MMAP_DOORBELL_MASK 0x8ull
>>> -#define KFD_MMAP_EVENTS_MASK 0x4ull
>>> -#define KFD_MMAP_RESERVED_MEM_MASK 0x2ull
>>> +/* GPU ID hash width in bits */
>>> +#define KFD_GPU_ID_HASH_WIDTH 16
>>> +
>>> +/* Use upper bits of mmap offset to store KFD driver specific information.
>>> + * BITS[63:62] - Encode MMAP type
>>> + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset 
>>> belongs to
>>> + * BITS[45:0]  - MMAP offset value
>>> + *
>>> + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
>>> + *  defines are w.r.t to PAGE_SIZE
>>> + */
>>> +#define KFD_MMAP_TYPE_SHIFT(62 - PAGE_SHIFT)
>>> +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>>> +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
>>> +#define KFD_MMAP_TYPE_EVENTS   (0x2ULL << KFD_MMAP_TYPE_SHIFT)
>>> +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
>> Isn't this new definition breaks existing user-space library (kfd thunk) ?
>> If that is the case we have a problem here.
>
> No, this does not break user mode, because user mode isn't aware of
> these definitions at all. The mmap offset comes from kernel mode and is
> opaque to user mode. User mode just passes it back down to kernel mode
> in the offset parameter of mmap. So the kernel can change the encoding
> of the mmap offset without breaking user mode.
>
Oops, you are correct of course, I forgot. That's what happens after 4
years of not seeing that particular code :)

Oded

> Regards,
>   Felix
>
>>
>> Oded
>>
>>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 11/21] drm/amdkfd: Add GFXv9 MQD manager

2018-05-11 Thread Oded Gabbay
On Fri, May 11, 2018 at 9:15 PM, Felix Kuehling  wrote:
> This patch series was meant to be applied after the userptr changes. I
> haven't tested this without the userptr changes.
>
> I think your main concern about userptr is the use of GFP_NOIO. I
> remember considering memalloc_noio_save/restore when I worked on this
> over a year ago. I found an old email thread I had with Christian about
> this (subject: MMU notifier deadlock on kernel 4.9):
>
>> memalloc_noio_save doesn't affect kmalloc directly. It sets
>> current->flags, which is used deep inside the page allocator, after
>> lockdep_trace_alloc(gfp) flags a lock as being used with IO enabled. The
>> slob allocator looks at gfp_allowed_mask, but I'm not sure how safe it
>> is to change that in a driver and it doesn't seem to be an exported
>> symbol anyway.
> Maybe this has changed in the mean time, but a year ago, using
> memalloc_noio_save/restore may have prevented real deadlocks, but would
> not shut up the lockdep warnings. Maybe this has changed in the mean
> time. FWIW, I don't find lockdep_trace_alloc in the current kernel.
>
> Regards,
>   Felix

I'm not familiar with this API, but from reading about it, it seems a
more robust solution then to change the GFP flags directly in each
kmalloc from the reasons I mentioned in the original email I sent.
Having said that, if no one else objects and we say we will look at
moving to that API in the future, I don't object to taking your
patch-set as is now.

Oded

>
>
> On 2018-05-11 05:10 AM, Oded Gabbay wrote:
>> On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling  
>> wrote:
>>> Signed-off-by: John Bridgman 
>>> Signed-off-by: Jay Cornwall 
>>> Signed-off-by: Felix Kuehling 
>>> ---
>>>  drivers/gpu/drm/amd/amdkfd/Makefile |   1 +
>>>  drivers/gpu/drm/amd/amdkfd/kfd_device.c |   2 +-
>>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c|   3 +
>>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 443 
>>> 
>>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   3 +
>>>  5 files changed, 451 insertions(+), 1 deletion(-)
>>>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
>>> b/drivers/gpu/drm/amd/amdkfd/Makefile
>>> index 52b3c1b..094b591 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
>>> @@ -30,6 +30,7 @@ amdkfd-y  := kfd_module.o kfd_device.o kfd_chardev.o 
>>> kfd_topology.o \
>>> kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
>>> kfd_process.o kfd_queue.o kfd_mqd_manager.o \
>>> kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
>>> +   kfd_mqd_manager_v9.o \
>>> kfd_kernel_queue.o kfd_kernel_queue_cik.o \
>>> kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
>>> kfd_packet_manager.o kfd_process_queue_manager.o \
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> index f563acb..c368ce3 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> @@ -700,7 +700,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned 
>>> int size,
>>> if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
>>> return -ENOMEM;
>>>
>>> -   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>>> +   *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>> This assumes the patch in the userptr patch-set is applied. I changed
>> it to GFP_KERNEL for now.
>>
>>> if ((*mem_obj) == NULL)
>>> return -ENOMEM;
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>>> index ee7061e..4b8eb50 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
>>> @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE 
>>> type,
>>> case CHIP_POLARIS10:
>>> case CHIP_POLARIS11:
>>> return mqd_manager_init_vi_tonga(type, dev);
>>> +   case CHIP_VEGA10:
>>> +   case CHIP_RAVEN:
>>> +   return mqd_manager_init_v9(type, dev);
>>> default:
>>> WARN(1, "Unexpected ASIC family %u",
>>>  dev->device_info->asic_family);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> new file mode 100644
>>> index 000..684054f
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> @@ -0,0 +1,443 @@
>>> +/*
>>> + * Copyright 2016-2018 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Ville Syrjälä
On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:
> On Fri, 11 May 2018 18:34:50 +0300
> Ville Syrjälä  wrote:
> 
> > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:
> > > Applying an underscan setup is just a matter of scaling all planes
> > > appropriately and adjusting the CRTC X/Y offset to account for the
> > > horizontal and vertical border.
> > > 
> > > Create an vc4_plane_underscan_adj() function doing that and call it from
> > > vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
> > > underscan properties to the HDMI connector.
> > > 
> > > Signed-off-by: Boris Brezillon 
> > > ---
> > > Changes in v2:
> > > - Take changes on hborder/vborder meaning into account
> > > ---
> > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > > -
> > >  1 file changed, 48 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > > b/drivers/gpu/drm/vc4/vc4_plane.c
> > > index 71d44c357d35..61ed60841cd6 100644
> > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct drm_plane_state 
> > > *state, int plane)
> > >   }
> > >  }
> > >  
> > > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> > > +{
> > > + struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > > + struct drm_connector_state *conn_state = NULL;
> > > + struct drm_connector *conn;
> > > + struct drm_crtc_state *crtc_state;
> > > + int i;
> > > +
> > > + for_each_new_connector_in_state(pstate->state, conn, conn_state, i) {
> > > + if (conn_state->crtc == pstate->crtc)
> > > + break;
> > > + }
> > > +
> > > + if (i == pstate->state->num_connector)
> > > + return 0;
> > > +
> > > + if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > > + return 0;
> > > +
> > > + crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > > +pstate->crtc);
> > > +
> > > + if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay ||
> > > + conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > > + return -EINVAL;  
> > 
> > border * 2 ?
> 
> Oops, indeed. I'll fix that.
> 
> > 
> > > +
> > > + vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > > + vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > > + vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > > +   (crtc_state->mode.hdisplay -
> > > +(conn_state->underscan.hborder * 2))) /
> > > +  crtc_state->mode.hdisplay;
> > > + vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > > +   (crtc_state->mode.vdisplay -
> > > +(conn_state->underscan.vborder * 2))) /
> > > +  crtc_state->mode.vdisplay;  
> > 
> > So you're now scaling all planes? The code seems to reject scaling for
> > the cursor plane, how are you dealing with that? Or just no cursor
> > allowed when underscanning?
> 
> No, I just didn't test with a cursor plane. We should probably avoid
> scaling the cursor plane and just adjust its position. Eric any opinion
> on that?

I don't think you can just not scale it. The user asked for the cursor
to be at a specific place with a specific size. Can't just ignore
that and do something else. Also eg. i915 would definitely scale the
cursor since we'd just scale the entire crtc instead of scaling
individual planes. Different drivers doing different things wouldn't
be good.

> 
> > 
> > I'm also wondering if there's any way we can reconcile these border
> > props with the scaling mode prop, should we ever wish to expose
> > these props on connectors that already have the scaling mode prop.
> 
> Nouveau seems to expose both, and I don't see why we couldn't.

First of all the interaction of these borders with panels that have
a fixed mode would need to be properly specified. Do we apply the
borders against the user mode or the panel's native mode? I guess
the latter would be a bit easier (would also match how the TV margins
behave I suppose). But that does leave open the question of how
would generic userspace know which case it's dealing with? Eg. if it
wants to underscan by some specific percentage it has to calculate
the borders based on the correct mode, otherwise the results aren't
going to match the expectations.

> 
> > Maybe we just have to require the scaling mode to be set to fullscreen 
> > to allow borders to be specified explictly?
> > 
> > > +
> > > + if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
> > > + return -EINVAL;
> > > +
> > > + return 0;
> > > +}
> > > +
> > >  static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state 
> > > *state)
> > >  {
> > >   struct drm_plane *plane = state->plane;
> > > @@ -269,7 +312,7 @@ static int 
> > > 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Ville Syrjälä
On Fri, May 11, 2018 at 07:12:21PM +0200, Boris Brezillon wrote:
> On Fri, 11 May 2018 19:54:02 +0300
> Ville Syrjälä  wrote:
> 
> > On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:
> > > On Fri, 11 May 2018 18:34:50 +0300
> > > Ville Syrjälä  wrote:
> > >   
> > > > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:  
> > > > > Applying an underscan setup is just a matter of scaling all planes
> > > > > appropriately and adjusting the CRTC X/Y offset to account for the
> > > > > horizontal and vertical border.
> > > > > 
> > > > > Create an vc4_plane_underscan_adj() function doing that and call it 
> > > > > from
> > > > > vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
> > > > > underscan properties to the HDMI connector.
> > > > > 
> > > > > Signed-off-by: Boris Brezillon 
> > > > > ---
> > > > > Changes in v2:
> > > > > - Take changes on hborder/vborder meaning into account
> > > > > ---
> > > > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > > > > -
> > > > >  1 file changed, 48 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > > > > b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > index 71d44c357d35..61ed60841cd6 100644
> > > > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct 
> > > > > drm_plane_state *state, int plane)
> > > > >   }
> > > > >  }
> > > > >  
> > > > > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> > > > > +{
> > > > > + struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > > > > + struct drm_connector_state *conn_state = NULL;
> > > > > + struct drm_connector *conn;
> > > > > + struct drm_crtc_state *crtc_state;
> > > > > + int i;
> > > > > +
> > > > > + for_each_new_connector_in_state(pstate->state, conn, 
> > > > > conn_state, i) {
> > > > > + if (conn_state->crtc == pstate->crtc)
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + if (i == pstate->state->num_connector)
> > > > > + return 0;
> > > > > +
> > > > > + if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > > > > + return 0;
> > > > > +
> > > > > + crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > > > > +pstate->crtc);
> > > > > +
> > > > > + if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay 
> > > > > ||
> > > > > + conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > > > > + return -EINVAL;
> > > > 
> > > > border * 2 ?  
> > > 
> > > Oops, indeed. I'll fix that.
> > >   
> > > >   
> > > > > +
> > > > > + vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > > > > + vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > > > > + vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > > > > +   (crtc_state->mode.hdisplay -
> > > > > +(conn_state->underscan.hborder * 2))) /
> > > > > +  crtc_state->mode.hdisplay;
> > > > > + vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > > > > +   (crtc_state->mode.vdisplay -
> > > > > +(conn_state->underscan.vborder * 2))) /
> > > > > +  crtc_state->mode.vdisplay;
> > > > 
> > > > So you're now scaling all planes? The code seems to reject scaling for
> > > > the cursor plane, how are you dealing with that? Or just no cursor
> > > > allowed when underscanning?  
> > > 
> > > No, I just didn't test with a cursor plane. We should probably avoid
> > > scaling the cursor plane and just adjust its position. Eric any opinion
> > > on that?  
> > 
> > I don't think you can just not scale it. The user asked for the cursor
> > to be at a specific place with a specific size. Can't just ignore
> > that and do something else. Also eg. i915 would definitely scale the
> > cursor since we'd just scale the entire crtc instead of scaling
> > individual planes. Different drivers doing different things wouldn't
> > be good.
> 
> Except in our case the scaling takes place before the composition, so
> we don't have a choice.

The choice is to either do what userspace asked, or return an error.

> 
> > 
> > >   
> > > > 
> > > > I'm also wondering if there's any way we can reconcile these border
> > > > props with the scaling mode prop, should we ever wish to expose
> > > > these props on connectors that already have the scaling mode prop.  
> > > 
> > > Nouveau seems to expose both, and I don't see why we couldn't.  
> > 
> > First of all the interaction of these borders with panels that have
> > a fixed mode would need to be properly specified. Do we apply the
> > 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Boris Brezillon
On Fri, 11 May 2018 19:54:02 +0300
Ville Syrjälä  wrote:

> On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:
> > On Fri, 11 May 2018 18:34:50 +0300
> > Ville Syrjälä  wrote:
> >   
> > > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:  
> > > > Applying an underscan setup is just a matter of scaling all planes
> > > > appropriately and adjusting the CRTC X/Y offset to account for the
> > > > horizontal and vertical border.
> > > > 
> > > > Create an vc4_plane_underscan_adj() function doing that and call it from
> > > > vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
> > > > underscan properties to the HDMI connector.
> > > > 
> > > > Signed-off-by: Boris Brezillon 
> > > > ---
> > > > Changes in v2:
> > > > - Take changes on hborder/vborder meaning into account
> > > > ---
> > > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > > > -
> > > >  1 file changed, 48 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > > > b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > index 71d44c357d35..61ed60841cd6 100644
> > > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct 
> > > > drm_plane_state *state, int plane)
> > > > }
> > > >  }
> > > >  
> > > > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> > > > +{
> > > > +   struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > > > +   struct drm_connector_state *conn_state = NULL;
> > > > +   struct drm_connector *conn;
> > > > +   struct drm_crtc_state *crtc_state;
> > > > +   int i;
> > > > +
> > > > +   for_each_new_connector_in_state(pstate->state, conn, 
> > > > conn_state, i) {
> > > > +   if (conn_state->crtc == pstate->crtc)
> > > > +   break;
> > > > +   }
> > > > +
> > > > +   if (i == pstate->state->num_connector)
> > > > +   return 0;
> > > > +
> > > > +   if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > > > +   return 0;
> > > > +
> > > > +   crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > > > +  pstate->crtc);
> > > > +
> > > > +   if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay 
> > > > ||
> > > > +   conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > > > +   return -EINVAL;
> > > 
> > > border * 2 ?  
> > 
> > Oops, indeed. I'll fix that.
> >   
> > >   
> > > > +
> > > > +   vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > > > +   vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > > > +   vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > > > + (crtc_state->mode.hdisplay -
> > > > +  (conn_state->underscan.hborder * 2))) /
> > > > +crtc_state->mode.hdisplay;
> > > > +   vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > > > + (crtc_state->mode.vdisplay -
> > > > +  (conn_state->underscan.vborder * 2))) /
> > > > +crtc_state->mode.vdisplay;
> > > 
> > > So you're now scaling all planes? The code seems to reject scaling for
> > > the cursor plane, how are you dealing with that? Or just no cursor
> > > allowed when underscanning?  
> > 
> > No, I just didn't test with a cursor plane. We should probably avoid
> > scaling the cursor plane and just adjust its position. Eric any opinion
> > on that?  
> 
> I don't think you can just not scale it. The user asked for the cursor
> to be at a specific place with a specific size. Can't just ignore
> that and do something else. Also eg. i915 would definitely scale the
> cursor since we'd just scale the entire crtc instead of scaling
> individual planes. Different drivers doing different things wouldn't
> be good.

Except in our case the scaling takes place before the composition, so
we don't have a choice.

> 
> >   
> > > 
> > > I'm also wondering if there's any way we can reconcile these border
> > > props with the scaling mode prop, should we ever wish to expose
> > > these props on connectors that already have the scaling mode prop.  
> > 
> > Nouveau seems to expose both, and I don't see why we couldn't.  
> 
> First of all the interaction of these borders with panels that have
> a fixed mode would need to be properly specified. Do we apply the
> borders against the user mode or the panel's native mode?

Hm, I'm a bit lost. Isn't crtc_state->mode supposed to contain the mode
that is about to be applied, no matter if it's a usermode or one of the
mode returned by the display? 

> I guess
> the latter would be a bit easier (would also match how the TV 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Boris Brezillon
On Fri, 11 May 2018 20:29:48 +0300
Ville Syrjälä  wrote:

> On Fri, May 11, 2018 at 07:12:21PM +0200, Boris Brezillon wrote:
> > On Fri, 11 May 2018 19:54:02 +0300
> > Ville Syrjälä  wrote:
> >   
> > > On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:  
> > > > On Fri, 11 May 2018 18:34:50 +0300
> > > > Ville Syrjälä  wrote:
> > > > 
> > > > > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:
> > > > > > Applying an underscan setup is just a matter of scaling all planes
> > > > > > appropriately and adjusting the CRTC X/Y offset to account for the
> > > > > > horizontal and vertical border.
> > > > > > 
> > > > > > Create an vc4_plane_underscan_adj() function doing that and call it 
> > > > > > from
> > > > > > vc4_plane_setup_clipping_and_scaling() so that we are ready to 
> > > > > > attach
> > > > > > underscan properties to the HDMI connector.
> > > > > > 
> > > > > > Signed-off-by: Boris Brezillon 
> > > > > > ---
> > > > > > Changes in v2:
> > > > > > - Take changes on hborder/vborder meaning into account
> > > > > > ---
> > > > > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > > > > > -
> > > > > >  1 file changed, 48 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > > > > > b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > index 71d44c357d35..61ed60841cd6 100644
> > > > > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct 
> > > > > > drm_plane_state *state, int plane)
> > > > > > }
> > > > > >  }
> > > > > >  
> > > > > > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
> > > > > > +{
> > > > > > +   struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > > > > > +   struct drm_connector_state *conn_state = NULL;
> > > > > > +   struct drm_connector *conn;
> > > > > > +   struct drm_crtc_state *crtc_state;
> > > > > > +   int i;
> > > > > > +
> > > > > > +   for_each_new_connector_in_state(pstate->state, conn, 
> > > > > > conn_state, i) {
> > > > > > +   if (conn_state->crtc == pstate->crtc)
> > > > > > +   break;
> > > > > > +   }
> > > > > > +
> > > > > > +   if (i == pstate->state->num_connector)
> > > > > > +   return 0;
> > > > > > +
> > > > > > +   if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > > > > > +   return 0;
> > > > > > +
> > > > > > +   crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > > > > > +  pstate->crtc);
> > > > > > +
> > > > > > +   if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay 
> > > > > > ||
> > > > > > +   conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > > > > > +   return -EINVAL;  
> > > > > 
> > > > > border * 2 ?
> > > > 
> > > > Oops, indeed. I'll fix that.
> > > > 
> > > > > 
> > > > > > +
> > > > > > +   vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > > > > > +   vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > > > > > +   vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > > > > > + (crtc_state->mode.hdisplay -
> > > > > > +  (conn_state->underscan.hborder * 2))) /
> > > > > > +crtc_state->mode.hdisplay;
> > > > > > +   vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > > > > > + (crtc_state->mode.vdisplay -
> > > > > > +  (conn_state->underscan.vborder * 2))) /
> > > > > > +crtc_state->mode.vdisplay;  
> > > > > 
> > > > > So you're now scaling all planes? The code seems to reject scaling for
> > > > > the cursor plane, how are you dealing with that? Or just no cursor
> > > > > allowed when underscanning?
> > > > 
> > > > No, I just didn't test with a cursor plane. We should probably avoid
> > > > scaling the cursor plane and just adjust its position. Eric any opinion
> > > > on that?
> > > 
> > > I don't think you can just not scale it. The user asked for the cursor
> > > to be at a specific place with a specific size. Can't just ignore
> > > that and do something else. Also eg. i915 would definitely scale the
> > > cursor since we'd just scale the entire crtc instead of scaling
> > > individual planes. Different drivers doing different things wouldn't
> > > be good.  
> > 
> > Except in our case the scaling takes place before the composition, so
> > we don't have a choice.  
> 
> The choice is to either do what userspace asked, or return an error.

Come on! If we can't use underscan when there's a cursor plane enabled
this feature is pretty much useless. But let's take a real use case to
show you how negligible the lack of scaling on the cursor plane 

Re: [PATCH 00/21] GFXv9/Vega10 support for KFD

2018-05-11 Thread Oded Gabbay
On Wed, Apr 11, 2018 at 12:32 AM, Felix Kuehling  wrote:
> This patch series adds support for GFXv9 GPUs to KFD. In this series it
> enables support for Vega10. Raven support requires some extra work that
> will follow shortly, but Raven support is already included and I didn't
> go out of my way to keep it out.
>
> Felix Kuehling (19):
>   drm/amdgpu: Remove unused interface from kfd2kgd interface
>   drm/amd: Update GFXv9 SDMA MQD structure
>   drm/amdgpu: Add GFXv9 TLB invalidation packet definition
>   drm/amdgpu: Add GFXv9 kfd2kgd interface functions
>   drm/amdgpu: Add doorbell routing info to kgd2kfd_shared_resources
>   drm/amdkfd: Make doorbell size ASIC-dependent
>   drm/amdkfd: Implement doorbell allocation for SOC15
>   drm/amdkfd: Move packet writer functions into ASIC-specific file
>   drm/amdkfd: Add GFXv9 PM4 packet writer functions
>   drm/amdkfd: Add GFXv9 MQD manager
>   drm/amdkfd: Add GFXv9 device queue manager
>   drm/amdkfd: Add SOC15 interrupt processing support
>   drm/amdkfd: Fix goto usage
>   drm/amdkfd: Fix kernel queue rollback_packet
>   drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue
>   drm/amdkfd: Remove limit on number of GPUs (follow-up)
>   drm/amdkfd: Support flat memory apertures for GFXv9
>   drm/amdkfd: Add GFXv9 CWSR trap handler
>   drm/amdkfd: Add Vega10 topology and device info
>
> Harish Kasiviswanathan (1):
>   drm/amdkfd: Clean up KFD_MMAP_ offset handling
>
> welu (1):
>   drm/amdkfd: Try to enable atomics for all GPUs
>
>  MAINTAINERS|2 +
>  drivers/gpu/drm/amd/amdgpu/Makefile|3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |   26 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  |   10 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |   10 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c  | 1043 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |1 +
>  drivers/gpu/drm/amd/amdgpu/soc15d.h|5 +
>  drivers/gpu/drm/amd/amdkfd/Makefile|   10 +-
>  .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm  | 1495 
> 
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |   42 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c  |   11 +
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c|   89 +-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  102 +-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |2 +
>  .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c   |   84 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c  |   65 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c|2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c   |  119 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c|   84 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c  |   39 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h  |7 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c  |9 +
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c   |  340 +
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c   |  319 +
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c|5 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c   |3 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c|  443 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c|  385 +
>  drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h|  583 
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  106 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   |   40 +-
>  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   12 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  |6 +
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.h  |1 +
>  drivers/gpu/drm/amd/amdkfd/soc15_int.h |   47 +
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h|   20 +-
>  drivers/gpu/drm/amd/include/v9_structs.h   |   48 +-
>  39 files changed, 5118 insertions(+), 501 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/soc15_int.h
>
> --
> 2.7.4
>

Series is:
Reviewed-by: Oded Gabbay 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

2018-05-11 Thread Oded Gabbay
applied this patch instead of original, thanks.

On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling  wrote:
> A minor update to this patch is attached. The rest of the series is
> unchanged and rebased cleanly on 4.17-rc2 on my system.
>
> Regards,
>   Felix
>
>
> On 2018-04-10 05:33 PM, Felix Kuehling wrote:
>> Signed-off-by: Felix Kuehling 
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 
>> +--
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  7 ++-
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
>>  7 files changed, 63 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index 36c9269e..5d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>>   }
>>  }
>>
>> +void write_kernel_doorbell64(void __iomem *db, u64 value)
>> +{
>> + if (db) {
>> + WARN(((unsigned long)db & 7) != 0,
>> +  "Unaligned 64-bit doorbell");
>> + writeq(value, (u64 __iomem *)db);
>> + pr_debug("writing %llu to doorbell address 0x%p\n", value, 
>> db);+}
>> +}
>> +
>>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>>   struct kfd_process *process,
>>   unsigned int doorbell_id)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 9f38161..476951d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct 
>> kfd_dev *dev,
>>   kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>>   kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>>
>> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
>> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>>   >wptr_mem);
>>
>>   if (retval != 0)
>> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>   size_t available_size;
>>   size_t queue_size_dwords;
>>   uint32_t wptr, rptr;
>> + uint64_t wptr64;
>>   unsigned int *queue_address;
>>
>>   /* When rptr == wptr, the buffer is empty.
>> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>>*/
>>   rptr = *kq->rptr_kernel;
>> - wptr = *kq->wptr_kernel;
>> + wptr = kq->pending_wptr;
>> + wptr64 = kq->pending_wptr64;
>>   queue_address = (unsigned int *)kq->pq_kernel_addr;
>>   queue_size_dwords = kq->queue->properties.queue_size / 4;
>>
>> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue 
>> *kq,
>>   while (wptr > 0) {
>>   queue_address[wptr] = kq->nop_packet;
>>   wptr = (wptr + 1) % queue_size_dwords;
>> + wptr64++;
>>   }
>>   }
>>
>>   *buffer_ptr = _address[wptr];
>>   kq->pending_wptr = wptr + packet_size_in_dwords;
>> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>>
>>   return 0;
>>
>> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>>   pr_debug("\n");
>>  #endif
>>
>> - *kq->wptr_kernel = kq->pending_wptr;
>> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> - kq->pending_wptr);
>> + kq->ops_asic_specific.submit_packet(kq);
>>  }
>>
>>  static void rollback_packet(struct kernel_queue *kq)
>>  {
>> - kq->pending_wptr = *kq->wptr_kernel;
>> + if (kq->dev->device_info->doorbell_size == 8) {
>> + kq->pending_wptr64 = *kq->wptr64_kernel;
>> + kq->pending_wptr = *kq->wptr_kernel %
>> + (kq->queue->properties.queue_size / 4);
>> + } else {
>> + kq->pending_wptr = *kq->wptr_kernel;
>> + }
>>  }
>>
>>  struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
>> *dev,
>>   case CHIP_HAWAII:
>>   kernel_queue_init_cik(>ops_asic_specific);
>>   break;
>> +
>> + case CHIP_VEGA10:
>> + case CHIP_RAVEN:
>> + kernel_queue_init_v9(>ops_asic_specific);
>> + break;
>>   default:
>>   WARN(1, "Unexpected 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Ville Syrjälä
On Fri, May 11, 2018 at 09:47:49PM +0200, Boris Brezillon wrote:
> On Fri, 11 May 2018 20:29:48 +0300
> Ville Syrjälä  wrote:
> 
> > On Fri, May 11, 2018 at 07:12:21PM +0200, Boris Brezillon wrote:
> > > On Fri, 11 May 2018 19:54:02 +0300
> > > Ville Syrjälä  wrote:
> > >   
> > > > On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:  
> > > > > On Fri, 11 May 2018 18:34:50 +0300
> > > > > Ville Syrjälä  wrote:
> > > > > 
> > > > > > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:
> > > > > > > Applying an underscan setup is just a matter of scaling all planes
> > > > > > > appropriately and adjusting the CRTC X/Y offset to account for the
> > > > > > > horizontal and vertical border.
> > > > > > > 
> > > > > > > Create an vc4_plane_underscan_adj() function doing that and call 
> > > > > > > it from
> > > > > > > vc4_plane_setup_clipping_and_scaling() so that we are ready to 
> > > > > > > attach
> > > > > > > underscan properties to the HDMI connector.
> > > > > > > 
> > > > > > > Signed-off-by: Boris Brezillon 
> > > > > > > ---
> > > > > > > Changes in v2:
> > > > > > > - Take changes on hborder/vborder meaning into account
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
> > > > > > > -
> > > > > > >  1 file changed, 48 insertions(+), 1 deletion(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
> > > > > > > b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > > index 71d44c357d35..61ed60841cd6 100644
> > > > > > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
> > > > > > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct 
> > > > > > > drm_plane_state *state, int plane)
> > > > > > >   }
> > > > > > >  }
> > > > > > >  
> > > > > > > +static int vc4_plane_underscan_adj(struct drm_plane_state 
> > > > > > > *pstate)
> > > > > > > +{
> > > > > > > + struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
> > > > > > > + struct drm_connector_state *conn_state = NULL;
> > > > > > > + struct drm_connector *conn;
> > > > > > > + struct drm_crtc_state *crtc_state;
> > > > > > > + int i;
> > > > > > > +
> > > > > > > + for_each_new_connector_in_state(pstate->state, conn, 
> > > > > > > conn_state, i) {
> > > > > > > + if (conn_state->crtc == pstate->crtc)
> > > > > > > + break;
> > > > > > > + }
> > > > > > > +
> > > > > > > + if (i == pstate->state->num_connector)
> > > > > > > + return 0;
> > > > > > > +
> > > > > > > + if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
> > > > > > > + return 0;
> > > > > > > +
> > > > > > > + crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
> > > > > > > +pstate->crtc);
> > > > > > > +
> > > > > > > + if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay 
> > > > > > > ||
> > > > > > > + conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
> > > > > > > + return -EINVAL;  
> > > > > > 
> > > > > > border * 2 ?
> > > > > 
> > > > > Oops, indeed. I'll fix that.
> > > > > 
> > > > > > 
> > > > > > > +
> > > > > > > + vc4_pstate->crtc_x += conn_state->underscan.hborder;
> > > > > > > + vc4_pstate->crtc_y += conn_state->underscan.vborder;
> > > > > > > + vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
> > > > > > > +   (crtc_state->mode.hdisplay -
> > > > > > > +(conn_state->underscan.hborder * 2))) /
> > > > > > > +  crtc_state->mode.hdisplay;
> > > > > > > + vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
> > > > > > > +   (crtc_state->mode.vdisplay -
> > > > > > > +(conn_state->underscan.vborder * 2))) /
> > > > > > > +  crtc_state->mode.vdisplay;  
> > > > > > 
> > > > > > So you're now scaling all planes? The code seems to reject scaling 
> > > > > > for
> > > > > > the cursor plane, how are you dealing with that? Or just no cursor
> > > > > > allowed when underscanning?
> > > > > 
> > > > > No, I just didn't test with a cursor plane. We should probably avoid
> > > > > scaling the cursor plane and just adjust its position. Eric any 
> > > > > opinion
> > > > > on that?
> > > > 
> > > > I don't think you can just not scale it. The user asked for the cursor
> > > > to be at a specific place with a specific size. Can't just ignore
> > > > that and do something else. Also eg. i915 would definitely scale the
> > > > cursor since we'd just scale the entire crtc instead of scaling
> > > > individual planes. Different drivers doing different things wouldn't
> > > > be good.  
> > > 
> > > Except in our case the scaling takes place before the composition, so
> > > we don't have a choice.  
> > 
> > The choice is to 

Re: [PATCH v2 2/4] drm/vc4: Take underscan setup into account when updating planes

2018-05-11 Thread Eric Anholt
Ville Syrjälä  writes:

> On Fri, May 11, 2018 at 07:12:21PM +0200, Boris Brezillon wrote:
>> On Fri, 11 May 2018 19:54:02 +0300
>> Ville Syrjälä  wrote:
>> 
>> > On Fri, May 11, 2018 at 05:52:56PM +0200, Boris Brezillon wrote:
>> > > On Fri, 11 May 2018 18:34:50 +0300
>> > > Ville Syrjälä  wrote:
>> > >   
>> > > > On Fri, May 11, 2018 at 04:59:17PM +0200, Boris Brezillon wrote:  
>> > > > > Applying an underscan setup is just a matter of scaling all planes
>> > > > > appropriately and adjusting the CRTC X/Y offset to account for the
>> > > > > horizontal and vertical border.
>> > > > > 
>> > > > > Create an vc4_plane_underscan_adj() function doing that and call it 
>> > > > > from
>> > > > > vc4_plane_setup_clipping_and_scaling() so that we are ready to attach
>> > > > > underscan properties to the HDMI connector.
>> > > > > 
>> > > > > Signed-off-by: Boris Brezillon 
>> > > > > ---
>> > > > > Changes in v2:
>> > > > > - Take changes on hborder/vborder meaning into account
>> > > > > ---
>> > > > >  drivers/gpu/drm/vc4/vc4_plane.c | 49 
>> > > > > -
>> > > > >  1 file changed, 48 insertions(+), 1 deletion(-)
>> > > > > 
>> > > > > diff --git a/drivers/gpu/drm/vc4/vc4_plane.c 
>> > > > > b/drivers/gpu/drm/vc4/vc4_plane.c
>> > > > > index 71d44c357d35..61ed60841cd6 100644
>> > > > > --- a/drivers/gpu/drm/vc4/vc4_plane.c
>> > > > > +++ b/drivers/gpu/drm/vc4/vc4_plane.c
>> > > > > @@ -258,6 +258,49 @@ static u32 vc4_get_scl_field(struct 
>> > > > > drm_plane_state *state, int plane)
>> > > > >  }
>> > > > >  }
>> > > > >  
>> > > > > +static int vc4_plane_underscan_adj(struct drm_plane_state *pstate)
>> > > > > +{
>> > > > > +struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
>> > > > > +struct drm_connector_state *conn_state = NULL;
>> > > > > +struct drm_connector *conn;
>> > > > > +struct drm_crtc_state *crtc_state;
>> > > > > +int i;
>> > > > > +
>> > > > > +for_each_new_connector_in_state(pstate->state, conn, 
>> > > > > conn_state, i) {
>> > > > > +if (conn_state->crtc == pstate->crtc)
>> > > > > +break;
>> > > > > +}
>> > > > > +
>> > > > > +if (i == pstate->state->num_connector)
>> > > > > +return 0;
>> > > > > +
>> > > > > +if (conn_state->underscan.mode != DRM_UNDERSCAN_ON)
>> > > > > +return 0;
>> > > > > +
>> > > > > +crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
>> > > > > +   pstate->crtc);
>> > > > > +
>> > > > > +if (conn_state->underscan.hborder >= crtc_state->mode.hdisplay 
>> > > > > ||
>> > > > > +conn_state->underscan.vborder >= crtc_state->mode.vdisplay)
>> > > > > +return -EINVAL;
>> > > > 
>> > > > border * 2 ?  
>> > > 
>> > > Oops, indeed. I'll fix that.
>> > >   
>> > > >   
>> > > > > +
>> > > > > +vc4_pstate->crtc_x += conn_state->underscan.hborder;
>> > > > > +vc4_pstate->crtc_y += conn_state->underscan.vborder;
>> > > > > +vc4_pstate->crtc_w = (vc4_pstate->crtc_w *
>> > > > > +  (crtc_state->mode.hdisplay -
>> > > > > +   (conn_state->underscan.hborder * 2))) /
>> > > > > + crtc_state->mode.hdisplay;
>> > > > > +vc4_pstate->crtc_h = (vc4_pstate->crtc_h *
>> > > > > +  (crtc_state->mode.vdisplay -
>> > > > > +   (conn_state->underscan.vborder * 2))) /
>> > > > > + crtc_state->mode.vdisplay;
>> > > > 
>> > > > So you're now scaling all planes? The code seems to reject scaling for
>> > > > the cursor plane, how are you dealing with that? Or just no cursor
>> > > > allowed when underscanning?  
>> > > 
>> > > No, I just didn't test with a cursor plane. We should probably avoid
>> > > scaling the cursor plane and just adjust its position. Eric any opinion
>> > > on that?  
>> > 
>> > I don't think you can just not scale it. The user asked for the cursor
>> > to be at a specific place with a specific size. Can't just ignore
>> > that and do something else. Also eg. i915 would definitely scale the
>> > cursor since we'd just scale the entire crtc instead of scaling
>> > individual planes. Different drivers doing different things wouldn't
>> > be good.
>> 
>> Except in our case the scaling takes place before the composition, so
>> we don't have a choice.
>
> The choice is to either do what userspace asked, or return an error.
>
>> 
>> > 
>> > >   
>> > > > 
>> > > > I'm also wondering if there's any way we can reconcile these border
>> > > > props with the scaling mode prop, should we ever wish to expose
>> > > > these props on connectors that already have the scaling mode prop.  
>> > > 
>> > > Nouveau seems to expose both, and I don't see why we couldn't.  
>> > 
>> > 

Re: [PATCH] drm/amdkfd: Integer overflows in ioctl

2018-05-11 Thread Oded Gabbay
On Tue, Apr 24, 2018 at 9:58 PM, Felix Kuehling  wrote:
> Reviewed-by: Felix Kuehling 
>
> We could probably add a sanity check for n_devices to avoid user mode
> causing excessive memory allocations in the kernel. There is no good
> reason for this to be bigger than the number of GPUs in the system. The
> maximum number of GPUs supported due to device minor limit in DRM is 128.
>
> Regards,
>   Felix
>
>
> On 2018-04-24 09:35 AM, Dan Carpenter wrote:
>> args->n_devices is a u32 that comes from the user.  The multiplication
>> could overflow on 32 bit systems possibly leading to privilege
>> escalation.
>>
>> Fixes: 5ec7e02854b3 ("drm/amdkfd: Add ioctls for GPUVM memory management")
>> Signed-off-by: Dan Carpenter dan.carpen...@oracle.com>
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index cd679cf1fd30..ce36e556da38 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1295,8 +1295,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file 
>> *filep,
>>   return -EINVAL;
>>   }
>>
>> - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
>> -   GFP_KERNEL);
>> + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
>> + GFP_KERNEL);
>>   if (!devices_arr)
>>   return -ENOMEM;
>>
>> @@ -1404,8 +1404,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
>> *filep,
>>   return -EINVAL;
>>   }
>>
>> - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
>> -   GFP_KERNEL);
>> + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
>> + GFP_KERNEL);
>>   if (!devices_arr)
>>   return -ENOMEM;
>>
>

Thanks!
Patch applied to amdkfd-fixes

Oded
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 11/21] drm/amdkfd: Add GFXv9 MQD manager

2018-05-11 Thread Felix Kuehling
On 2018-05-11 03:06 PM, Oded Gabbay wrote:
> On Fri, May 11, 2018 at 9:15 PM, Felix Kuehling  
> wrote:
>> This patch series was meant to be applied after the userptr changes. I
>> haven't tested this without the userptr changes.
>>
>> I think your main concern about userptr is the use of GFP_NOIO. I
>> remember considering memalloc_noio_save/restore when I worked on this
>> over a year ago. I found an old email thread I had with Christian about
>> this (subject: MMU notifier deadlock on kernel 4.9):
>>
>>> memalloc_noio_save doesn't affect kmalloc directly. It sets
>>> current->flags, which is used deep inside the page allocator, after
>>> lockdep_trace_alloc(gfp) flags a lock as being used with IO enabled. The
>>> slob allocator looks at gfp_allowed_mask, but I'm not sure how safe it
>>> is to change that in a driver and it doesn't seem to be an exported
>>> symbol anyway.
>> Maybe this has changed in the mean time, but a year ago, using
>> memalloc_noio_save/restore may have prevented real deadlocks, but would
>> not shut up the lockdep warnings. Maybe this has changed in the mean
>> time. FWIW, I don't find lockdep_trace_alloc in the current kernel.
>>
>> Regards,
>>   Felix
> I'm not familiar with this API, but from reading about it, it seems a
> more robust solution then to change the GFP flags directly in each
> kmalloc from the reasons I mentioned in the original email I sent.

I agree.

> Having said that, if no one else objects and we say we will look at
> moving to that API in the future, I don't object to taking your
> patch-set as is now.

Yeah. I'll look into this.

Regards,
  Felix

>
> Oded
>
>>
>> On 2018-05-11 05:10 AM, Oded Gabbay wrote:
>>> On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling  
>>> wrote:
 Signed-off-by: John Bridgman 
 Signed-off-by: Jay Cornwall 
 Signed-off-by: Felix Kuehling 
 ---
  drivers/gpu/drm/amd/amdkfd/Makefile |   1 +
  drivers/gpu/drm/amd/amdkfd/kfd_device.c |   2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c|   3 +
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 443 
 
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   3 +
  5 files changed, 451 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

 diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
 b/drivers/gpu/drm/amd/amdkfd/Makefile
 index 52b3c1b..094b591 100644
 --- a/drivers/gpu/drm/amd/amdkfd/Makefile
 +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
 @@ -30,6 +30,7 @@ amdkfd-y  := kfd_module.o kfd_device.o kfd_chardev.o 
 kfd_topology.o \
 kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
 kfd_process.o kfd_queue.o kfd_mqd_manager.o \
 kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
 +   kfd_mqd_manager_v9.o \
 kfd_kernel_queue.o kfd_kernel_queue_cik.o \
 kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
 kfd_packet_manager.o kfd_process_queue_manager.o \
 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
 b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
 index f563acb..c368ce3 100644
 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
 @@ -700,7 +700,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned 
 int size,
 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
 return -ENOMEM;

 -   *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
 +   *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>>> This assumes the patch in the userptr patch-set is applied. I changed
>>> it to GFP_KERNEL for now.
>>>
 if ((*mem_obj) == NULL)
 return -ENOMEM;

 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
 b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
 index ee7061e..4b8eb50 100644
 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
 @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE 
 type,
 case CHIP_POLARIS10:
 case CHIP_POLARIS11:
 return mqd_manager_init_vi_tonga(type, dev);
 +   case CHIP_VEGA10:
 +   case CHIP_RAVEN:
 +   return mqd_manager_init_v9(type, dev);
 default:
 WARN(1, "Unexpected ASIC family %u",
  dev->device_info->asic_family);
 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
 b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
 new file mode 100644
 index 000..684054f
 --- /dev/null
 +++