date:20190325

Re: [PATCH 1/2] drm/amdgpu: move VM table mapping into the backend as well

2019-03-25 Thread Kuehling, Felix

The series is Reviewed-by: Felix Kuehling 

On 2019-03-25 8:22 a.m., Christian König wrote:
> Clean that up further and also fix another case where the BO
> wasn't kmapped for CPU based updates.
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 31 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 11 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 20 +
>   4 files changed, 37 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index af1a7020c3ab..c9c8309a4d3f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -660,17 +660,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm,
>   if (bo->tbo.type != ttm_bo_type_kernel) {
>   amdgpu_vm_bo_moved(bo_base);
>   } else {
> - if (vm->use_cpu_for_update)
> - r = amdgpu_bo_kmap(bo, NULL);
> - else
> - r = amdgpu_ttm_alloc_gart(>tbo);
> - if (r)
> - break;
> - if (bo->shadow) {
> - r = amdgpu_ttm_alloc_gart(>shadow->tbo);
> - if (r)
> - break;
> - }
> + vm->update_funcs->map_table(bo);
>   amdgpu_vm_bo_relocated(bo_base);
>   }
>   }
> @@ -752,22 +742,17 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device 
> *adev,
>   if (r)
>   return r;
>   
> - r = amdgpu_ttm_alloc_gart(>tbo);
> - if (r)
> - return r;
> -
>   if (bo->shadow) {
>   r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
>   );
>   if (r)
>   return r;
> -
> - r = amdgpu_ttm_alloc_gart(>shadow->tbo);
> - if (r)
> - return r;
> -
>   }
>   
> + r = vm->update_funcs->map_table(bo);
> + if (r)
> + return r;
> +
>   memset(, 0, sizeof(params));
>   params.adev = adev;
>   params.vm = vm;
> @@ -878,12 +863,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device 
> *adev,
>   if (r)
>   return r;
>   
> - if (vm->use_cpu_for_update) {
> - r = amdgpu_bo_kmap(pt, NULL);
> - if (r)
> - goto error_free_pt;
> - }
> -
>   /* Keep a reference to the root directory to avoid
>* freeing them up in the wrong order.
>*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 520122be798b..3ec875c0cc76 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -215,7 +215,7 @@ struct amdgpu_vm_update_params {
>   };
>   
>   struct amdgpu_vm_update_funcs {
> -
> + int (*map_table)(struct amdgpu_bo *bo);
>   int (*prepare)(struct amdgpu_vm_update_params *p, void * owner,
>  struct dma_fence *exclusive);
>   int (*update)(struct amdgpu_vm_update_params *p,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index 9d53982021de..5222d165abfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -24,6 +24,16 @@
>   #include "amdgpu_object.h"
>   #include "amdgpu_trace.h"
>   
> +/**
> + * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped
> + *
> + * @table: newly allocated or validated PD/PT
> + */
> +static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table)
> +{
> + return amdgpu_bo_kmap(table, NULL);
> +}
> +
>   /**
>* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
>*
> @@ -110,6 +120,7 @@ static int amdgpu_vm_cpu_commit(struct 
> amdgpu_vm_update_params *p,
>   }
>   
>   const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = {
> + .map_table = amdgpu_vm_cpu_map_table,
>   .prepare = amdgpu_vm_cpu_prepare,
>   .update = amdgpu_vm_cpu_update,
>   .commit = amdgpu_vm_cpu_commit
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index e4bacdb44c68..4bccd69fe30d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -28,6 +28,25 @@
>   #define AMDGPU_VM_SDMA_MIN_NUM_DW   256u
>   #define AMDGPU_VM_SDMA_MAX_NUM_DW   (16u * 1024u)
>   
> +/**
> + * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
> + *
> + * @table: newly allocated or validated PD/PT
> + */
> +static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table)
> +{
> + int

Re: Kernel panic while “ modprobe amdkfd ; modprobe -r amdkfd ; 4.14.35 kernel

2019-03-25 Thread john p donnelly

Hi 

Yes. I finally found it upstream in 4.20 . 

Thank you for following up.

JD.

On March 25, 2019, at 5:36 PM, "Kuehling, Felix"  wrote:

On 2019-03-22 12:58 p.m., John Donnelly wrote:
> Hello ,
>
> I am investigating a issue reported by a test group concerning this driver.  
> Their test loads and unloads every kernel module included in the 4.14.35 
> kernel release . You don’t even need a AMD platform . It occurs on any Intel, 
>  or a  KVM VM instance too.
>
> Kernel panic while “  modprobe amdkfd ;  modprobe -r amdkfd  “
>
> [  329.425334]  ? __slab_free+0x9b/0x2ba
> [  329.427836]  ? process_slab+0x3c1/0x45c
> [  329.430336]  dev_printk_emit+0x4e/0x65
> [  329.432829]  __dev_printk+0x46/0x8b
> [  329.435183]  _dev_info+0x6c/0x85
> [  329.437435]  ? kfree+0x141/0x182
> [  329.439646]  kfd_module_exit+0x37/0x39 [amdkfd]
> [  329.442258]  SyS_delete_module+0x1c3/0x26f
> [  329.444722]  ? entry_SYSCALL_64_after_hwframe+0xaa/0x0
> [  329.447479]  ? entry_SYSCALL_64_after_hwframe+0xa3/0x0
> [  329.450206]  ? entry_SYSCALL_64_after_hwframe+0x9c/0x0
> [  329.452912]  ? entry_SYSCALL_64_after_hwframe+0x95/0x0
> [  329.455586]  do_syscall_64+0x79/0x1ae
> [  329.457766]  entry_SYSCALL_64_after_hwframe+0x151/0x0
> [  329.460369] RIP: 0033:0x7f1757a1b457
> [  329.462502] RSP: 002b:7ffd62ce1f48 EFLAGS: 0206 ORIG_RAX:
>
>
>
> Sometimes  the unload works but the message logged is garbage:
>
> [root@jpd-vmbase02 ~]# modprobe -r amdkfd
> [  144.449981]  hn??蟟??xn??ן??kfd: Removed module

I think this was caused by using dev_info with a kfd_device that didn't 
exist any more. It was fixed by this commit:

commit c393e9b2d51540b74e18e555df14706098dbf2cc
Author: Randy Dunlap 
Date:   Mon Nov 13 18:08:48 2017 +0200

     drm/amdkfd: fix amdkfd use-after-free GP fault

     Fix GP fault caused by dev_info() reference to a struct device*
     after the device has been freed (use after free).
     kfd_chardev_exit() frees the device so 'kfd_device' should not
     be used after calling kfd_chardev_exit().

     Signed-off-by: Randy Dunlap 
     Signed-off-by: Oded Gabbay 


>
>
> Is  this something one of team members could have possibly corrected in an 
> upstream version ?

In current kernels, amdkfd is no longer a separate KO. It's part of 
amdgpu now. Also see above. This bug is probably not reproducible any more.

Regards,
   Felix


>
> #define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
> #define KFD_DRIVER_DATE "20150421"
> #define KFD_DRIVER_MAJOR0
> #define KFD_DRIVER_MINOR7
> #define KFD_DRIVER_PATCHLEVEL   2
>
>
> Any advise welcome.
>
>
> Thank you,
>
> John
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_amd-2Dgfx=DwIGaQ=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE=t2fPg9D87F7D8jm0_3CG9yoiIKdRg4qc_thBw4bzMhc=fXBtUgRbDmM7cYdHC5RtuT1u2iV2lRyquM841Ym9Qzc=_KG8CtYmUyHttnC7N6HAmRRPcZaKRkKqcY2041rGNQ0=
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Kernel panic while “ modprobe amdkfd ; modprobe -r amdkfd ; 4.14.35 kernel

2019-03-25 Thread Kuehling, Felix

On 2019-03-22 12:58 p.m., John Donnelly wrote:
> Hello ,
>
> I am investigating a issue reported by a test group concerning this driver.  
> Their test loads and unloads every kernel module included in the 4.14.35 
> kernel release . You don’t even need a AMD platform . It occurs on any Intel, 
>  or a  KVM VM instance too.
>
> Kernel panic while “  modprobe amdkfd ;  modprobe -r amdkfd  “
>
> [  329.425334]  ? __slab_free+0x9b/0x2ba
> [  329.427836]  ? process_slab+0x3c1/0x45c
> [  329.430336]  dev_printk_emit+0x4e/0x65
> [  329.432829]  __dev_printk+0x46/0x8b
> [  329.435183]  _dev_info+0x6c/0x85
> [  329.437435]  ? kfree+0x141/0x182
> [  329.439646]  kfd_module_exit+0x37/0x39 [amdkfd]
> [  329.442258]  SyS_delete_module+0x1c3/0x26f
> [  329.444722]  ? entry_SYSCALL_64_after_hwframe+0xaa/0x0
> [  329.447479]  ? entry_SYSCALL_64_after_hwframe+0xa3/0x0
> [  329.450206]  ? entry_SYSCALL_64_after_hwframe+0x9c/0x0
> [  329.452912]  ? entry_SYSCALL_64_after_hwframe+0x95/0x0
> [  329.455586]  do_syscall_64+0x79/0x1ae
> [  329.457766]  entry_SYSCALL_64_after_hwframe+0x151/0x0
> [  329.460369] RIP: 0033:0x7f1757a1b457
> [  329.462502] RSP: 002b:7ffd62ce1f48 EFLAGS: 0206 ORIG_RAX:
>
>
>
> Sometimes  the unload works but the message logged is garbage:
>
> [root@jpd-vmbase02 ~]# modprobe -r amdkfd
> [  144.449981]  hn??蟟??xn??ן??kfd: Removed module

I think this was caused by using dev_info with a kfd_device that didn't 
exist any more. It was fixed by this commit:

commit c393e9b2d51540b74e18e555df14706098dbf2cc
Author: Randy Dunlap 
Date:   Mon Nov 13 18:08:48 2017 +0200

     drm/amdkfd: fix amdkfd use-after-free GP fault

     Fix GP fault caused by dev_info() reference to a struct device*
     after the device has been freed (use after free).
     kfd_chardev_exit() frees the device so 'kfd_device' should not
     be used after calling kfd_chardev_exit().

     Signed-off-by: Randy Dunlap 
     Signed-off-by: Oded Gabbay 


>
>
> Is  this something one of team members could have possibly corrected in an 
> upstream version ?

In current kernels, amdkfd is no longer a separate KO. It's part of 
amdgpu now. Also see above. This bug is probably not reproducible any more.

Regards,
   Felix


>
> #define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
> #define KFD_DRIVER_DATE "20150421"
> #define KFD_DRIVER_MAJOR0
> #define KFD_DRIVER_MINOR7
> #define KFD_DRIVER_PATCHLEVEL   2
>
>
> Any advise welcome.
>
>
> Thank you,
>
> John
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: XGMI pstate switch initial support

2019-03-25 Thread Kuehling, Felix

I don't see any check for the memory type. As far as I can tell you'll 
power up XGMI even for system memory mappings. See inline.

On 2019-03-22 3:28 p.m., Liu, Shaoyun wrote:
> Driver vote low to high pstate switch whenever there is an outstanding
> XGMI mapping request. Driver vote high to low pstate when all the
> outstanding XGMI mapping is terminated.
>
> Change-Id: I197501f853c47f844055c0e28c0ac00a1ff06607
> Signed-off-by: shaoyunl 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 16 +++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   | 10 ++
>   6 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index ec9562d..c4c61e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2018,6 +2018,10 @@ static void 
> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>   r = amdgpu_device_enable_mgpu_fan_boost();
>   if (r)
>   DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
> +
> + /*set to low pstate by default */
> + amdgpu_xgmi_set_pstate(adev, 0);
> +
>   }
>   
>   static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 220a6a7..c430e82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -72,6 +72,8 @@ struct amdgpu_bo_va {
>   
>   /* If the mappings are cleared or filled */
>   boolcleared;
> +
> + boolis_xgmi;
>   };
>   
>   struct amdgpu_bo {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 729da1c..a7247d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -34,6 +34,7 @@
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gmc.h"
> +#include "amdgpu_xgmi.h"
>   
>   /**
>* DOC: GPUVM
> @@ -2072,6 +2073,15 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
> amdgpu_device *adev,
>   INIT_LIST_HEAD(_va->valids);
>   INIT_LIST_HEAD(_va->invalids);
>   
> + if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev))) {
> + bo_va->is_xgmi = true;

You're setting this to true even for system memory BOs that don't 
involve XGMI mappings. That means you'll power up XGMI unnecessarily in 
many cases because KFD processes always have system memory mappings that 
are mapped to all GPUs (e.g. the signal page).

Regards,
   Felix


> + mutex_lock(>vm_manager.lock_pstate);
> + /* Power up XGMI if it can be potentially used */
> + if (++adev->vm_manager.xgmi_map_counter == 1)
> + amdgpu_xgmi_set_pstate(adev, 1);
> + mutex_unlock(>vm_manager.lock_pstate);
> + }
> +
>   return bo_va;
>   }
>   
> @@ -2490,6 +2500,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>   }
>   
>   dma_fence_put(bo_va->last_pt_update);
> +
> + if (bo && bo_va->is_xgmi) {
> + mutex_lock(>vm_manager.lock_pstate);
> + if (--adev->vm_manager.xgmi_map_counter == 0)
> + amdgpu_xgmi_set_pstate(adev, 0);
> + mutex_unlock(>vm_manager.lock_pstate);
> + }
> +
>   kfree(bo_va);
>   }
>   
> @@ -2997,6 +3015,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   
>   idr_init(>vm_manager.pasid_idr);
>   spin_lock_init(>vm_manager.pasid_lock);
> +
> + adev->vm_manager.xgmi_map_counter = 0;
> + mutex_init(>vm_manager.lock_pstate);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 520122b..f586b38 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -324,6 +324,10 @@ struct amdgpu_vm_manager {
>*/
>   struct idr  pasid_idr;
>   spinlock_t  pasid_lock;
> +
> + /* counter of mapped memory through xgmi */
> + uint32_txgmi_map_counter;
> + struct mutexlock_pstate;
>   };
>   
>   #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) 
> ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index fcc4b05..3368347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++

Re: [PATCH v13 14/20] drm/amdgpu, arm64: untag user pointers in amdgpu_ttm_tt_get_user_pages

2019-03-25 Thread Kuehling, Felix

On 2019-03-20 10:51 a.m., Andrey Konovalov wrote:
> This patch is a part of a series that extends arm64 kernel ABI to allow to
> pass tagged user pointers (with the top byte set to something else other
> than 0x00) as syscall arguments.
>
> amdgpu_ttm_tt_get_user_pages() uses provided user pointers for vma
> lookups, which can only by done with untagged pointers.
>
> Untag user pointers in this function.
>
> Signed-off-by: Andrey Konovalov 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 73e71e61dc99..891b027fa33b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -751,10 +751,11 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
>* check that we only use anonymous memory to prevent problems
>* with writeback
>*/
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> + unsigned long userptr = untagged_addr(gtt->userptr);
> + unsigned long end = userptr + ttm->num_pages * PAGE_SIZE;
>   struct vm_area_struct *vma;
>   
> - vma = find_vma(mm, gtt->userptr);
> + vma = find_vma(mm, userptr);
>   if (!vma || vma->vm_file || vma->vm_end < end) {
>   up_read(>mmap_sem);
>   return -EPERM;

We'll need to be careful that we don't break your change when the 
following commit gets applied through drm-next for Linux 5.2:

https://cgit.freedesktop.org/~agd5f/linux/commit/?h=drm-next-5.2-wip=915d3eecfa23693bac9e54cdacf84fb4efdcc5c4

Would it make sense to apply the untagging in amdgpu_ttm_tt_set_userptr 
instead? That would avoid this conflict and I think it would clearly put 
the untagging into the user mode code path where the tagged pointer 
originates.

In amdgpu_gem_userptr_ioctl and amdgpu_amdkfd_gpuvm.c (init_user_pages) 
we also set up an MMU notifier with the (tagged) pointer from user mode. 
That should probably also use the untagged address so that MMU notifiers 
for the untagged address get correctly matched up with the right BO. I'd 
move the untagging further up the call stack to cover that. For the GEM 
case I think amdgpu_gem_userptr_ioctl would be the right place. For the 
KFD case, I'd do this in amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.

Regards,
   Felix

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v2] gpu: radeon: fix a potential NULL-pointer dereference

2019-03-25 Thread Kangjie Lu

In case alloc_workqueue fails, the fix frees memory and
returns -ENOMEM to avoid potential NULL pointer dereference.

Signed-off-by: Kangjie Lu 
---
v2: use radeon_crtc_destroy to properly clean up resources as
suggested by Michel Dänzer 
---
 drivers/gpu/drm/radeon/radeon_display.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c 
b/drivers/gpu/drm/radeon/radeon_display.c
index aa898c699101..3c6d3289316b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -663,7 +663,7 @@ static const struct drm_crtc_funcs radeon_crtc_funcs = {
.page_flip_target = radeon_crtc_page_flip_target,
 };
 
-static void radeon_crtc_init(struct drm_device *dev, int index)
+static int radeon_crtc_init(struct drm_device *dev, int index)
 {
struct radeon_device *rdev = dev->dev_private;
struct radeon_crtc *radeon_crtc;
@@ -671,13 +671,18 @@ static void radeon_crtc_init(struct drm_device *dev, int 
index)
 
radeon_crtc = kzalloc(sizeof(struct radeon_crtc) + (RADEONFB_CONN_LIMIT 
* sizeof(struct drm_connector *)), GFP_KERNEL);
if (radeon_crtc == NULL)
-   return;
+   return -ENOMEM;
 
drm_crtc_init(dev, _crtc->base, _crtc_funcs);
 
drm_mode_crtc_set_gamma_size(_crtc->base, 256);
radeon_crtc->crtc_id = index;
radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+   if (!radeon_crtc->flip_queue) {
+   DRM_ERROR("failed to allocate the flip queue\n");
+   radeon_crtc_destroy(_crtc->base);
+   return -ENOMEM;
+   }
rdev->mode_info.crtcs[index] = radeon_crtc;
 
if (rdev->family >= CHIP_BONAIRE) {
@@ -706,6 +711,8 @@ static void radeon_crtc_init(struct drm_device *dev, int 
index)
radeon_atombios_init_crtc(dev, radeon_crtc);
else
radeon_legacy_init_crtc(dev, radeon_crtc);
+
+   return 0;
 }
 
 static const char *encoder_names[38] = {
@@ -1612,7 +1619,9 @@ int radeon_modeset_init(struct radeon_device *rdev)
 
/* allocate crtcs */
for (i = 0; i < rdev->num_crtc; i++) {
-   radeon_crtc_init(rdev->ddev, i);
+   ret = radeon_crtc_init(rdev->ddev, i);
+   if (ret)
+   return ret;
}
 
/* okay we should have all the bios connectors */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/8] drm/amdgpu: new VM update backends

2019-03-25 Thread Kuehling, Felix

On 2019-03-25 7:38 a.m., Christian König wrote:
> Am 20.03.19 um 12:57 schrieb Kuehling, Felix:
>> As far as I can tell, the whole series is a small cleanup and big
>> refactor to enable CPU clearing of PTs without a lot of ugliness or code
>> duplication.
>
> It's a bit more than that. Key point is that I can now easily add a 
> parameter for direct submission during page fault handling :)

You mean for working around problems with CPU page table updates from 
page faults, to force all such updates through SDMA?

Regards,
   Felix


>
> Christian.
>
>> It looks good to me. I haven't reviewed all the moved SDMA
>> update code to make sure it all works correctly, but at least the
>> prepare and commit functions look sane to me.
>>
>> For this patch I have a suggestion (inline) to remove params->ib, which
>> seems redundant with params->job. That would also ripple through the
>> remaining patches.
>>
>> Other than that, the series is Reviewed-by: Felix Kuehling
>> 
>>
>> [+Kent], Look out for this patch series in an upcoming merge to
>> amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of
>> regressions (like all big amdgpu_vm changes IME).
>>
>> Regards,
>>     Felix
>>
>> On 3/19/2019 8:44 AM, Christian König wrote:
>>> Separate out all functions for SDMA and CPU based page table
>>> updates into separate backends.
>>>
>>> This way we can keep most of the complexity of those from the
>>> core VM code.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/Makefile |   3 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   7 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  30 ++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 116 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 
>>> 
>>>    5 files changed, 401 insertions(+), 3 deletions(-)
>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> [snip]
>>> +/**
>>> + * amdgpu_vm_sdma_prepare - prepare SDMA command submission
>>> + *
>>> + * @p: see amdgpu_vm_update_params definition
>>> + * @owner: owner we need to sync to
>>> + * @exclusive: exclusive move fence we need to sync to
>>> + *
>>> + * Returns:
>>> + * Negativ errno, 0 for success.
>>> + */
>>> +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>>> +  void *owner, struct dma_fence *exclusive)
>>> +{
>>> +    struct amdgpu_bo *root = p->vm->root.base.bo;
>>> +    unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
>>> +    int r;
>>> +
>>> +    r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, >job);
>>> +    if (r)
>>> +    return r;
>>> +
>>> +    r = amdgpu_sync_fence(p->adev, >job->sync, exclusive, false);
>>> +    if (r)
>>> +    return r;
>>> +
>>> +    r = amdgpu_sync_resv(p->adev, >job->sync, root->tbo.resv,
>>> + owner, false);
>>> +    if (r)
>>> +    return r;
>>> +
>>> +    p->num_dw_left = ndw;
>>> +    p->ib = >job->ibs[0];
>> With p->job added, do we still need p->ib? We could just use
>> >job->ibs[0] directly, which should perform the same or be more
>> efficient since it's just a constant offset from p->job.
>>
>>
>>> +    return 0;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_vm_sdma_commit - commit SDMA command submission
>>> + *
>>> + * @p: see amdgpu_vm_update_params definition
>>> + * @fence: resulting fence
>>> + *
>>> + * Returns:
>>> + * Negativ errno, 0 for success.
>>> + */
>>> +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>> + struct dma_fence **fence)
>>> +{
>>> +    struct amdgpu_bo *root = p->vm->root.base.bo;
>>> +    struct amdgpu_ring *ring;
>>> +    struct dma_fence *f;
>>> +    int r;
>>> +
>>> +    ring = container_of(p->vm->entity.rq->sched, struct 
>>> amdgpu_ring, sched);
>>> +
>>> +    WARN_ON(p->ib->length_dw == 0);
>>> +    amdgpu_ring_pad_ib(ring, p->ib);
>>> +    WARN_ON(p->ib->length_dw > p->num_dw_left);
>>> +    r = amdgpu_job_submit(p->job, >vm->entity,
>>> +  AMDGPU_FENCE_OWNER_VM, );
>>> +    if (r)
>>> +    goto error;
>>> +
>>> +    amdgpu_bo_fence(root, f, true);
>>> +    if (fence)
>>> +    swap(*fence, f);
>>> +    dma_fence_put(f);
>>> +    return 0;
>>> +
>>> +error:
>>> +    amdgpu_job_free(p->job);
>>> +    return r;
>>> +}
>>> +
>>> +
>>> +/**
>>> + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>> + *
>>> + * @p: see amdgpu_vm_update_params definition
>>> + * @bo: PD/PT to update
>>> + * @pe: addr of the page entry
>>> + * @count: number of page entries to copy
>>> + *
>>> + * Traces the parameters and calls the DMA function to copy the PTEs.
>>> + */
>>> +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params 
>>> *p,
>>> + struct amdgpu_bo *bo, uint64_t pe,
>>> + unsigned count)
>>> +{
>>> +    uint64_t src = p->ib->gpu_addr;
>>> +
>>> +    src += p->num_dw_left * 4;

[PATCH] drm/amdgpu: Adjust TMR address alignment as per HW requirement

2019-03-25 Thread Liu, Shaoyun

According to HW engineer, they prefer the TMR address be "naturally aligned", 
e.g. the start address
must be an integer divide of TME size.

Change-Id: Ie01b3d41e564fc8f416048e001d75edb64c045e3
Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2206bb4..905cce1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -187,13 +187,13 @@ static int psp_tmr_init(struct psp_context *psp)
int ret;
 
/*
-* Allocate 3M memory aligned to 1M from Frame Buffer (local
-* physical).
+* According to HW engineer, they prefer the TMR address be "naturally
+* aligned" , e.g. the start address be an integer divide of TMR size.
 *
 * Note: this memory need be reserved till the driver
 * uninitializes.
 */
-   ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x10,
+   ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, PSP_TMR_SIZE,
  AMDGPU_GEM_DOMAIN_VRAM,
  >tmr_bo, >tmr_mc_addr, 
>tmr_buf);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/display: Remove semicolon from to_dm_plane_state definition

2019-03-25 Thread Nicholas Kazlauskas

The extra ; in the macro definition creates an empty statement
preventing any variable declarations from occuring after
any use of to_dm_plane_state(...).

Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 87ca5746f861..94b77488a0e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -57,7 +57,7 @@ struct amdgpu_hpd;
 #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
 #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, 
base)
 
-#define to_dm_plane_state(x)   container_of(x, struct dm_plane_state, base);
+#define to_dm_plane_state(x)   container_of(x, struct dm_plane_state, base)
 
 #define AMDGPU_MAX_HPD_PINS 6
 #define AMDGPU_MAX_CRTCS 6
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on smu7

2019-03-25 Thread Russell, Kent

Thanks for that confirmation Evan. I will drop this patch.

Kent

KENT RUSSELL
Sr. Software Engineer | Linux Compute Kernel
1 Commerce Valley Drive East
Markham, ON L3T 7X6
O +(1) 289-695-2122 | Ext 72122

From: Quan, Evan
Sent: Monday, March 25, 2019 10:26:54 AM
To: Russell, Kent; amd-gfx@lists.freedesktop.org
Cc: Russell, Kent
Subject: RE: [PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on smu7

As I know, BOOTUP profile is only available for Vega10 and later ASICs.
The BOOTUP profile for SMU7 has only some dummy settings and is not expected to 
switch to such mode.
The default power profile for SMU7 is FULLSCREEN3D(different with Vega10 and 
later ASICs).
hwmgr->workload_mask = 1 << 
hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Russell, Kent
> Sent: 2019年3月25日 20:42
> To: amd-gfx@lists.freedesktop.org
> Cc: Russell, Kent 
> Subject: [PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on
> smu7
>
> With SMU7 using case statements, against vega10/20 only checking for the
> CUSTOM profile, SMU7 can't set the power profile back to the
> BOOTUP_DEFAULT, while newer HWMGRs can. Add the case statement in to
> align functionality with other ASICs
>
> Change-Id: Ibc7df3b94b1a9dabcb88934e534c91209fc75967
> Signed-off-by: Kent Russell 
> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> index 74d55b8fb74e..7c3eae2a34b4 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> @@ -5005,6 +5005,7 @@ static int smu7_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, uint
>hwmgr->power_profile_mode = mode;
>}
>break;
> + case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
>case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
>case PP_SMC_POWER_PROFILE_POWERSAVING:
>case PP_SMC_POWER_PROFILE_VIDEO:
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile switch

2019-03-25 Thread Russell, Kent

I set CUSTOM to a weird value like "1 2 3 4 5 6" , then switched to VR profile, 
then switched back to CUSTOM (just with "echo 6 > profile), and it kept my 
custom values. Before this patch, it throws EINVAL but still switches it to 
CUSTOM with the saved values, on both vega10 and vega20.

Kent

KENT RUSSELL
Sr. Software Engineer | Linux Compute Kernel
1 Commerce Valley Drive East
Markham, ON L3T 7X6
O +(1) 289-695-2122 | Ext 72122

From: Quan, Evan
Sent: Monday, March 25, 2019 10:14:03 AM
To: Russell, Kent; amd-gfx@lists.freedesktop.org
Cc: Russell, Kent
Subject: RE: [PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile switch

How can you tell whether it was already applied before or 1st time to switch to 
compute mode?
For the latter case, other settings/parameters do need.

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Russell, Kent
> Sent: 2019年3月25日 20:42
> To: amd-gfx@lists.freedesktop.org
> Cc: Russell, Kent 
> Subject: [PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile
> switch
>
> Don't return an error after switching to the CUSTOM profile, if there aren't
> any CUSTOM values passed in. The CUSTOM profile is already applied, so if
> no other arguments are passed, just return instead of throwing -EINVAL
> when successful
>
> Change-Id: I114cc9783226ee9ebb146863897e951527a85e20
> Signed-off-by: Kent Russell 
> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 6 +-
> drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 5 -
>  2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ed6c638700f5..a98b927282ac 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4911,7 +4911,11 @@ static int vega10_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, ui
>1< >power_profile_mode);
>
>if (hwmgr->power_profile_mode ==
> PP_SMC_POWER_PROFILE_CUSTOM) {
> - if (size == 0 || size > 4)
> + /* If size = 0, then just return, it was applied above */
> + if (size == 0)
> + return 0;
> +
> + if (size != 4)
>return -EINVAL;
>
>data->custom_profile_mode[0] = busy_set_point = input[0];
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 664544e7fcdc..225b2102c82c 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3828,8 +3828,11 @@ static int vega20_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, ui
>}
>
>if (hwmgr->power_profile_mode ==
> PP_SMC_POWER_PROFILE_CUSTOM) {
> - if (size < 10)
> + if (size < 10 && size != 0)
>return -EINVAL;
> + /* If size = 0, then just return, it was applied above */
> + if (size == 0)
> + return 0;
>
>result = vega20_get_activity_monitor_coeff(hwmgr,
>(uint8_t *)(_monitor),
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on smu7

2019-03-25 Thread Quan, Evan

As I know, BOOTUP profile is only available for Vega10 and later ASICs.
The BOOTUP profile for SMU7 has only some dummy settings and is not expected to 
switch to such mode.
The default power profile for SMU7 is FULLSCREEN3D(different with Vega10 and 
later ASICs).
hwmgr->workload_mask = 1 << 
hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Russell, Kent
> Sent: 2019年3月25日 20:42
> To: amd-gfx@lists.freedesktop.org
> Cc: Russell, Kent 
> Subject: [PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on
> smu7
> 
> With SMU7 using case statements, against vega10/20 only checking for the
> CUSTOM profile, SMU7 can't set the power profile back to the
> BOOTUP_DEFAULT, while newer HWMGRs can. Add the case statement in to
> align functionality with other ASICs
> 
> Change-Id: Ibc7df3b94b1a9dabcb88934e534c91209fc75967
> Signed-off-by: Kent Russell 
> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> index 74d55b8fb74e..7c3eae2a34b4 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
> @@ -5005,6 +5005,7 @@ static int smu7_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, uint
>   hwmgr->power_profile_mode = mode;
>   }
>   break;
> + case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
>   case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
>   case PP_SMC_POWER_PROFILE_POWERSAVING:
>   case PP_SMC_POWER_PROFILE_VIDEO:
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile switch

2019-03-25 Thread Quan, Evan

How can you tell whether it was already applied before or 1st time to switch to 
compute mode?
For the latter case, other settings/parameters do need.

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Russell, Kent
> Sent: 2019年3月25日 20:42
> To: amd-gfx@lists.freedesktop.org
> Cc: Russell, Kent 
> Subject: [PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile
> switch
> 
> Don't return an error after switching to the CUSTOM profile, if there aren't
> any CUSTOM values passed in. The CUSTOM profile is already applied, so if
> no other arguments are passed, just return instead of throwing -EINVAL
> when successful
> 
> Change-Id: I114cc9783226ee9ebb146863897e951527a85e20
> Signed-off-by: Kent Russell 
> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 6 +-
> drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 5 -
>  2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ed6c638700f5..a98b927282ac 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4911,7 +4911,11 @@ static int vega10_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, ui
>   1< >power_profile_mode);
> 
>   if (hwmgr->power_profile_mode ==
> PP_SMC_POWER_PROFILE_CUSTOM) {
> - if (size == 0 || size > 4)
> + /* If size = 0, then just return, it was applied above */
> + if (size == 0)
> + return 0;
> +
> + if (size != 4)
>   return -EINVAL;
> 
>   data->custom_profile_mode[0] = busy_set_point = input[0];
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 664544e7fcdc..225b2102c82c 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3828,8 +3828,11 @@ static int vega20_set_power_profile_mode(struct
> pp_hwmgr *hwmgr, long *input, ui
>   }
> 
>   if (hwmgr->power_profile_mode ==
> PP_SMC_POWER_PROFILE_CUSTOM) {
> - if (size < 10)
> + if (size < 10 && size != 0)
>   return -EINVAL;
> + /* If size = 0, then just return, it was applied above */
> + if (size == 0)
> + return 0;
> 
>   result = vega20_get_activity_monitor_coeff(hwmgr,
>   (uint8_t *)(_monitor),
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v13 09/20] net, arm64: untag user pointers in tcp_zerocopy_receive

2019-03-25 Thread Kevin Brodsky


On 22/03/2019 12:04, Catalin Marinas wrote:

On Wed, Mar 20, 2019 at 03:51:23PM +0100, Andrey Konovalov wrote:

This patch is a part of a series that extends arm64 kernel ABI to allow to
pass tagged user pointers (with the top byte set to something else other
than 0x00) as syscall arguments.

tcp_zerocopy_receive() uses provided user pointers for vma lookups, which
can only by done with untagged pointers.

Untag user pointers in this function.

Signed-off-by: Andrey Konovalov 
---
  net/ipv4/tcp.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6baa6dc1b13b..855a1f68c1ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1761,6 +1761,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
if (address & (PAGE_SIZE - 1) || address != zc->address)
return -EINVAL;
  
+	address = untagged_addr(address);

+
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;

I don't think we need this patch if we stick to Vincenzo's ABI
restrictions. Can zc->address be an anonymous mmap()? My understanding
of TCP_ZEROCOPY_RECEIVE is that this is an mmap() on a socket, so user
should not tag such pointer.


Good point, I hadn't looked into the interface properly. The `vma->vm_ops != 
_vm_ops` check just below makes sure that the mapping is specifically tied to a 
TCP socket, so definitely not included in the ABI relaxation.



We want to allow tagged pointers to work transparently only for heap and
stack, hence the restriction to anonymous mmap() and those addresses
below sbrk(0).


That's not quite true: in the ABI relaxation v2, all private mappings that are either 
anonymous or backed by a regular file are included. The scope is quite a bit larger 
than heap and stack, even though this is what we're primarily interested in for now.


Kevin
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v13 17/20] media/v4l2-core, arm64: untag user pointers in videobuf_dma_contig_user_get

2019-03-25 Thread Kevin Brodsky


On 22/03/2019 16:07, Catalin Marinas wrote:

On Wed, Mar 20, 2019 at 03:51:31PM +0100, Andrey Konovalov wrote:

This patch is a part of a series that extends arm64 kernel ABI to allow to
pass tagged user pointers (with the top byte set to something else other
than 0x00) as syscall arguments.

videobuf_dma_contig_user_get() uses provided user pointers for vma
lookups, which can only by done with untagged pointers.

Untag the pointers in this function.

Signed-off-by: Andrey Konovalov 
---
  drivers/media/v4l2-core/videobuf-dma-contig.c | 9 +
  1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c 
b/drivers/media/v4l2-core/videobuf-dma-contig.c
index e1bf50df4c70..8a1ddd146b17 100644
--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
@@ -160,6 +160,7 @@ static void videobuf_dma_contig_user_put(struct 
videobuf_dma_contig_memory *mem)
  static int videobuf_dma_contig_user_get(struct videobuf_dma_contig_memory 
*mem,
struct videobuf_buffer *vb)
  {
+   unsigned long untagged_baddr = untagged_addr(vb->baddr);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long prev_pfn, this_pfn;
@@ -167,22 +168,22 @@ static int videobuf_dma_contig_user_get(struct 
videobuf_dma_contig_memory *mem,
unsigned int offset;
int ret;
  
-	offset = vb->baddr & ~PAGE_MASK;

+   offset = untagged_baddr & ~PAGE_MASK;
mem->size = PAGE_ALIGN(vb->size + offset);
ret = -EINVAL;
  
  	down_read(>mmap_sem);
  
-	vma = find_vma(mm, vb->baddr);

+   vma = find_vma(mm, untagged_baddr);
if (!vma)
goto out_up;
  
-	if ((vb->baddr + mem->size) > vma->vm_end)

+   if ((untagged_baddr + mem->size) > vma->vm_end)
goto out_up;
  
  	pages_done = 0;

prev_pfn = 0; /* kill warning */
-   user_address = vb->baddr;
+   user_address = untagged_baddr;
  
  	while (pages_done < (mem->size >> PAGE_SHIFT)) {

ret = follow_pfn(vma, user_address, _pfn);

I don't think vb->baddr here is anonymous mmap() but worth checking the
call paths.


I spent some time on this, I didn't find any restriction on the kind of mapping 
that's allowed here. The API regarding V4L2_MEMORY_USERPTR doesn't seem to say 
anything about that either [0] [1]. It's probably best to ask the V4L2 maintainers.


Kevin

[0] https://www.kernel.org/doc/html/latest/media/uapi/v4l/vidioc-qbuf.html
[1] https://www.kernel.org/doc/html/latest/media/uapi/v4l/userp.html
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v13 14/20] drm/amdgpu, arm64: untag user pointers in amdgpu_ttm_tt_get_user_pages

2019-03-25 Thread Kevin Brodsky


On 22/03/2019 15:59, Catalin Marinas wrote:

On Wed, Mar 20, 2019 at 03:51:28PM +0100, Andrey Konovalov wrote:

This patch is a part of a series that extends arm64 kernel ABI to allow to
pass tagged user pointers (with the top byte set to something else other
than 0x00) as syscall arguments.

amdgpu_ttm_tt_get_user_pages() uses provided user pointers for vma
lookups, which can only by done with untagged pointers.

Untag user pointers in this function.

Signed-off-by: Andrey Konovalov 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 73e71e61dc99..891b027fa33b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -751,10 +751,11 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
struct page **pages)
 * check that we only use anonymous memory to prevent problems
 * with writeback
 */
-   unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+   unsigned long userptr = untagged_addr(gtt->userptr);
+   unsigned long end = userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
  
-		vma = find_vma(mm, gtt->userptr);

+   vma = find_vma(mm, userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
up_read(>mmap_sem);
return -EPERM;

I tried to track this down but I failed to see whether user could
provide an tagged pointer here (under the restrictions as per Vincenzo's
ABI document).


->userptr is set by radeon_ttm_tt_set_userptr(), itself called from 
radeon_gem_userptr_ioctl(). Any page-aligned value is allowed.


Kevin
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: Allow changing to BOOTUP profile on smu7

2019-03-25 Thread Russell, Kent

With SMU7 using case statements, against vega10/20 only checking for the
CUSTOM profile, SMU7 can't set the power profile back to the
BOOTUP_DEFAULT, while newer HWMGRs can. Add the case statement in to
align functionality with other ASICs

Change-Id: Ibc7df3b94b1a9dabcb88934e534c91209fc75967
Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 74d55b8fb74e..7c3eae2a34b4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -5005,6 +5005,7 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr 
*hwmgr, long *input, uint
hwmgr->power_profile_mode = mode;
}
break;
+   case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
case PP_SMC_POWER_PROFILE_POWERSAVING:
case PP_SMC_POWER_PROFILE_VIDEO:
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu: Allow changing to CUSTOM profile on smu7

2019-03-25 Thread Russell, Kent

Allow changing to the CUSTOM profile without requiring the
parameters being passed in each time. Store the values in
the smu7_profiling table since it's defined here anyways

Change-Id: I6c5e3a1487e12410a6a7670a5cf1a6599253344d
Signed-off-by: Kent Russell 
---
 .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c  | 27 +++
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 83d3d935f3ac..74d55b8fb74e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -77,7 +77,7 @@
 #define PCIE_BUS_CLK1
 #define TCLK(PCIE_BUS_CLK / 10)
 
-static const struct profile_mode_setting smu7_profiling[7] =
+static struct profile_mode_setting smu7_profiling[7] =
{{0, 0, 0, 0, 0, 0, 0, 0},
 {1, 0, 100, 30, 1, 0, 100, 10},
 {1, 10, 0, 30, 0, 0, 0, 0},
@@ -4984,17 +4984,22 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr 
*hwmgr, long *input, uint
mode = input[size];
switch (mode) {
case PP_SMC_POWER_PROFILE_CUSTOM:
-   if (size < 8)
+   if (size < 8 && size != 0)
return -EINVAL;
-
-   tmp.bupdate_sclk = input[0];
-   tmp.sclk_up_hyst = input[1];
-   tmp.sclk_down_hyst = input[2];
-   tmp.sclk_activity = input[3];
-   tmp.bupdate_mclk = input[4];
-   tmp.mclk_up_hyst = input[5];
-   tmp.mclk_down_hyst = input[6];
-   tmp.mclk_activity = input[7];
+   /* If only CUSTOM is passed in, use the saved values */
+   if (size == 0) {
+   tmp = smu7_profiling[PP_SMC_POWER_PROFILE_CUSTOM];
+   } else {
+   tmp.bupdate_sclk = input[0];
+   tmp.sclk_up_hyst = input[1];
+   tmp.sclk_down_hyst = input[2];
+   tmp.sclk_activity = input[3];
+   tmp.bupdate_mclk = input[4];
+   tmp.mclk_up_hyst = input[5];
+   tmp.mclk_down_hyst = input[6];
+   tmp.mclk_activity = input[7];
+   smu7_profiling[PP_SMC_POWER_PROFILE_CUSTOM] = tmp;
+   }
if (!smum_update_dpm_settings(hwmgr, )) {
memcpy(>current_profile_setting, , 
sizeof(struct profile_mode_setting));
hwmgr->power_profile_mode = mode;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: Stop returning EINVAL during profile switch

2019-03-25 Thread Russell, Kent

Don't return an error after switching to the CUSTOM profile,
if there aren't any CUSTOM values passed in. The CUSTOM profile
is already applied, so if no other arguments are passed, just
return instead of throwing -EINVAL when successful

Change-Id: I114cc9783226ee9ebb146863897e951527a85e20
Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 6 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 5 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ed6c638700f5..a98b927282ac 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4911,7 +4911,11 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr 
*hwmgr, long *input, ui
1power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
-   if (size == 0 || size > 4)
+   /* If size = 0, then just return, it was applied above */
+   if (size == 0)
+   return 0;
+
+   if (size != 4)
return -EINVAL;
 
data->custom_profile_mode[0] = busy_set_point = input[0];
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 664544e7fcdc..225b2102c82c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3828,8 +3828,11 @@ static int vega20_set_power_profile_mode(struct pp_hwmgr 
*hwmgr, long *input, ui
}
 
if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
-   if (size < 10)
+   if (size < 10 && size != 0)
return -EINVAL;
+   /* If size = 0, then just return, it was applied above */
+   if (size == 0)
+   return 0;
 
result = vega20_get_activity_monitor_coeff(hwmgr,
(uint8_t *)(_monitor),
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: drop the ib from the VM update parameters

2019-03-25 Thread Christian König

It is redundant with the job pointer.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 24 +++--
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 3ec875c0cc76..241e425e881d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -203,11 +203,6 @@ struct amdgpu_vm_update_params {
 */
struct amdgpu_job *job;
 
-   /**
-* @ib: indirect buffer to fill with commands
-*/
-   struct amdgpu_ib *ib;
-
/**
 * @num_dw_left: number of dw left for the IB
 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4bccd69fe30d..ddd181f5ed37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -78,7 +78,6 @@ static int amdgpu_vm_sdma_prepare(struct 
amdgpu_vm_update_params *p,
return r;
 
p->num_dw_left = ndw;
-   p->ib = >job->ibs[0];
return 0;
 }
 
@@ -95,15 +94,16 @@ static int amdgpu_vm_sdma_commit(struct 
amdgpu_vm_update_params *p,
 struct dma_fence **fence)
 {
struct amdgpu_bo *root = p->vm->root.base.bo;
+   struct amdgpu_ib *ib = p->job->ibs;
struct amdgpu_ring *ring;
struct dma_fence *f;
int r;
 
ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
 
-   WARN_ON(p->ib->length_dw == 0);
-   amdgpu_ring_pad_ib(ring, p->ib);
-   WARN_ON(p->ib->length_dw > p->num_dw_left);
+   WARN_ON(ib->length_dw == 0);
+   amdgpu_ring_pad_ib(ring, ib);
+   WARN_ON(ib->length_dw > p->num_dw_left);
r = amdgpu_job_submit(p->job, >vm->entity,
  AMDGPU_FENCE_OWNER_VM, );
if (r)
@@ -135,14 +135,15 @@ static void amdgpu_vm_sdma_copy_ptes(struct 
amdgpu_vm_update_params *p,
 struct amdgpu_bo *bo, uint64_t pe,
 unsigned count)
 {
-   uint64_t src = p->ib->gpu_addr;
+   struct amdgpu_ib *ib = p->job->ibs;
+   uint64_t src = ib->gpu_addr;
 
src += p->num_dw_left * 4;
 
pe += amdgpu_bo_gpu_offset(bo);
trace_amdgpu_vm_copy_ptes(pe, src, count);
 
-   amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count);
+   amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
 }
 
 /**
@@ -164,13 +165,15 @@ static void amdgpu_vm_sdma_set_ptes(struct 
amdgpu_vm_update_params *p,
uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags)
 {
+   struct amdgpu_ib *ib = p->job->ibs;
+
pe += amdgpu_bo_gpu_offset(bo);
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
if (count < 3) {
-   amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags,
+   amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
count, incr);
} else {
-   amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr,
+   amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
  count, incr, flags);
}
 }
@@ -200,7 +203,7 @@ static int amdgpu_vm_sdma_update(struct 
amdgpu_vm_update_params *p,
 
do {
ndw = p->num_dw_left;
-   ndw -= p->ib->length_dw;
+   ndw -= p->job->ibs->length_dw;
 
if (ndw < 32) {
r = amdgpu_vm_sdma_commit(p, NULL);
@@ -219,7 +222,6 @@ static int amdgpu_vm_sdma_update(struct 
amdgpu_vm_update_params *p,
return r;
 
p->num_dw_left = ndw;
-   p->ib = >job->ibs[0];
}
 
if (!p->pages_addr) {
@@ -243,7 +245,7 @@ static int amdgpu_vm_sdma_update(struct 
amdgpu_vm_update_params *p,
 
/* Put the PTEs at the end of the IB. */
p->num_dw_left -= nptes * 2;
-   pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]);
+   pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
for (i = 0; i < nptes; ++i, addr += incr) {
pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
pte[i] |= flags;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: move VM table mapping into the backend as well

2019-03-25 Thread Christian König

Clean that up further and also fix another case where the BO
wasn't kmapped for CPU based updates.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 31 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 11 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 20 +
 4 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index af1a7020c3ab..c9c8309a4d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -660,17 +660,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
} else {
-   if (vm->use_cpu_for_update)
-   r = amdgpu_bo_kmap(bo, NULL);
-   else
-   r = amdgpu_ttm_alloc_gart(>tbo);
-   if (r)
-   break;
-   if (bo->shadow) {
-   r = amdgpu_ttm_alloc_gart(>shadow->tbo);
-   if (r)
-   break;
-   }
+   vm->update_funcs->map_table(bo);
amdgpu_vm_bo_relocated(bo_base);
}
}
@@ -752,22 +742,17 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
return r;
 
-   r = amdgpu_ttm_alloc_gart(>tbo);
-   if (r)
-   return r;
-
if (bo->shadow) {
r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
);
if (r)
return r;
-
-   r = amdgpu_ttm_alloc_gart(>shadow->tbo);
-   if (r)
-   return r;
-
}
 
+   r = vm->update_funcs->map_table(bo);
+   if (r)
+   return r;
+
memset(, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
@@ -878,12 +863,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (r)
return r;
 
-   if (vm->use_cpu_for_update) {
-   r = amdgpu_bo_kmap(pt, NULL);
-   if (r)
-   goto error_free_pt;
-   }
-
/* Keep a reference to the root directory to avoid
 * freeing them up in the wrong order.
 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 520122be798b..3ec875c0cc76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -215,7 +215,7 @@ struct amdgpu_vm_update_params {
 };
 
 struct amdgpu_vm_update_funcs {
-
+   int (*map_table)(struct amdgpu_bo *bo);
int (*prepare)(struct amdgpu_vm_update_params *p, void * owner,
   struct dma_fence *exclusive);
int (*update)(struct amdgpu_vm_update_params *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 9d53982021de..5222d165abfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -24,6 +24,16 @@
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
 
+/**
+ * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table)
+{
+   return amdgpu_bo_kmap(table, NULL);
+}
+
 /**
  * amdgpu_vm_cpu_prepare - prepare page table update with the CPU
  *
@@ -110,6 +120,7 @@ static int amdgpu_vm_cpu_commit(struct 
amdgpu_vm_update_params *p,
 }
 
 const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = {
+   .map_table = amdgpu_vm_cpu_map_table,
.prepare = amdgpu_vm_cpu_prepare,
.update = amdgpu_vm_cpu_update,
.commit = amdgpu_vm_cpu_commit
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index e4bacdb44c68..4bccd69fe30d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -28,6 +28,25 @@
 #define AMDGPU_VM_SDMA_MIN_NUM_DW  256u
 #define AMDGPU_VM_SDMA_MAX_NUM_DW  (16u * 1024u)
 
+/**
+ * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table)
+{
+   int r;
+
+   r = amdgpu_ttm_alloc_gart(>tbo);
+   if (r)
+   return r;
+
+   if (table->shadow)
+   r = amdgpu_ttm_alloc_gart(>shadow->tbo);
+
+   return r;
+}
+
 /**
  * amdgpu_vm_sdma_prepare - prepare SDMA command submission
  *
@@

Re: [PATCH] drm/amdgpu: fix vm_cpu_update hit NULL pointer

2019-03-25 Thread Christian König


Hi Monk,

well it would certainly work, but for the error handling it's just not 
the best idea to do the mapping so late.


E.g. we should do the kmap for CPU based updates as well as the GTT map 
for SDMA based updates much more earlier to avoid rolling back all work 
again when we are interrupted by a signal for example.


Give me a moment to write a patch for this, it's basically the same 
thing as your patch just called from an earlier point.


Regards,
Christian.

Am 24.03.19 um 04:42 schrieb Liu, Monk:

Hi Christian

Why it is the wrong time ?  the pte/pd bo already reserved, some details maybe?

Thanks
/Monk
-Original Message-
From: Christian König 
Sent: Friday, March 22, 2019 11:43 PM
To: Liu, Monk ; amd-...@freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: fix vm_cpu_update hit NULL pointer

Am 22.03.19 um 06:30 schrieb Monk Liu:

should use amdgpu_bo_map, otherwise you'll hit NULL pointer bug if
with amdgpu_bo_kptr

Yeah that is a known problem. NAK to this one cause that would map the BO at 
the wrong time.

But in general I have a proper fix for this in the pipeline.

Christian.


Signed-off-by: Monk Liu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 9d53982..1fb6295a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -76,8 +76,10 @@ static int amdgpu_vm_cpu_update(struct 
amdgpu_vm_update_params *p,
   {
unsigned int i;
uint64_t value;
+   void *ptr;
   
-	pe += (unsigned long)amdgpu_bo_kptr(bo);

+   amdgpu_bo_kmap(bo, );
+   pe += (unsigned long)ptr;
   
   	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
   

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: XGMI pstate switch initial support

2019-03-25 Thread Christian König


Am 22.03.19 um 20:28 schrieb Liu, Shaoyun:

Driver vote low to high pstate switch whenever there is an outstanding
XGMI mapping request. Driver vote high to low pstate when all the
outstanding XGMI mapping is terminated.

Change-Id: I197501f853c47f844055c0e28c0ac00a1ff06607
Signed-off-by: shaoyunl 



+   int pstate; /*0 -- low , 1 -- high , -1 unknown*/
Maybe Alex or somebody else who is working on the power management stuff 
knows a better enum for this.


But at least from the VM side the patch is Reviewed-by: Christian König 
.


Regards,
Christian.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/8] drm/amdgpu: new VM update backends

2019-03-25 Thread Christian König


Am 20.03.19 um 12:57 schrieb Kuehling, Felix:

As far as I can tell, the whole series is a small cleanup and big
refactor to enable CPU clearing of PTs without a lot of ugliness or code
duplication.


It's a bit more than that. Key point is that I can now easily add a 
parameter for direct submission during page fault handling :)


Christian.


It looks good to me. I haven't reviewed all the moved SDMA
update code to make sure it all works correctly, but at least the
prepare and commit functions look sane to me.

For this patch I have a suggestion (inline) to remove params->ib, which
seems redundant with params->job. That would also ripple through the
remaining patches.

Other than that, the series is Reviewed-by: Felix Kuehling


[+Kent], Look out for this patch series in an upcoming merge to
amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of
regressions (like all big amdgpu_vm changes IME).

Regards,
    Felix

On 3/19/2019 8:44 AM, Christian König wrote:

Separate out all functions for SDMA and CPU based page table
updates into separate backends.

This way we can keep most of the complexity of those from the
core VM code.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/Makefile |   3 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   7 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  30 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 116 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 
   5 files changed, 401 insertions(+), 3 deletions(-)
   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c

[snip]

+/**
+ * amdgpu_vm_sdma_prepare - prepare SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @owner: owner we need to sync to
+ * @exclusive: exclusive move fence we need to sync to
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
+ void *owner, struct dma_fence *exclusive)
+{
+   struct amdgpu_bo *root = p->vm->root.base.bo;
+   unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+   int r;
+
+   r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, >job);
+   if (r)
+   return r;
+
+   r = amdgpu_sync_fence(p->adev, >job->sync, exclusive, false);
+   if (r)
+   return r;
+
+   r = amdgpu_sync_resv(p->adev, >job->sync, root->tbo.resv,
+owner, false);
+   if (r)
+   return r;
+
+   p->num_dw_left = ndw;
+   p->ib = >job->ibs[0];

With p->job added, do we still need p->ib? We could just use
>job->ibs[0] directly, which should perform the same or be more
efficient since it's just a constant offset from p->job.



+   return 0;
+}
+
+/**
+ * amdgpu_vm_sdma_commit - commit SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @fence: resulting fence
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
+struct dma_fence **fence)
+{
+   struct amdgpu_bo *root = p->vm->root.base.bo;
+   struct amdgpu_ring *ring;
+   struct dma_fence *f;
+   int r;
+
+   ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
+
+   WARN_ON(p->ib->length_dw == 0);
+   amdgpu_ring_pad_ib(ring, p->ib);
+   WARN_ON(p->ib->length_dw > p->num_dw_left);
+   r = amdgpu_job_submit(p->job, >vm->entity,
+ AMDGPU_FENCE_OWNER_VM, );
+   if (r)
+   goto error;
+
+   amdgpu_bo_fence(root, f, true);
+   if (fence)
+   swap(*fence, f);
+   dma_fence_put(f);
+   return 0;
+
+error:
+   amdgpu_job_free(p->job);
+   return r;
+}
+
+
+/**
+ * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: addr of the page entry
+ * @count: number of page entries to copy
+ *
+ * Traces the parameters and calls the DMA function to copy the PTEs.
+ */
+static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
+struct amdgpu_bo *bo, uint64_t pe,
+unsigned count)
+{
+   uint64_t src = p->ib->gpu_addr;
+
+   src += p->num_dw_left * 4;
+
+   pe += amdgpu_bo_gpu_offset(bo);
+   trace_amdgpu_vm_copy_ptes(pe, src, count);
+
+   amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count);
+}
+
+/**
+ * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ *

Re: [PATCH] gpu: radeon: fix a potential NULL-pointer dereference

2019-03-25 Thread Michel Dänzer

Hi Kangjie,

thanks for your patch.

On 2019-03-23 3:29 a.m., Kangjie Lu wrote:
> In case alloc_workqueue fails, the fix frees memory and
> returns to avoid potential NULL pointer dereference.
> 
> Signed-off-by: Kangjie Lu 
> ---
>  drivers/gpu/drm/radeon/radeon_display.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c 
> b/drivers/gpu/drm/radeon/radeon_display.c
> index aa898c699101..a31305755a77 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -678,6 +678,11 @@ static void radeon_crtc_init(struct drm_device *dev, int 
> index)
>   drm_mode_crtc_set_gamma_size(_crtc->base, 256);
>   radeon_crtc->crtc_id = index;
>   radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
> + if (!radeon_crtc->flip_queue) {
> + DRM_ERROR("failed to allocate the flip queue\n");
> + kfree(radeon_crtc);

This would leak some memory referenced by struct drm_crtc. To solve
this, I suggest calling radeon_crtc_destroy here and making that cope
with radeon_crtc->flip_queue being NULL.

Also, I'm not sure all driver code can handle some CRTCs not
initializing. Given that, and as alloc_workqueue presumably only fails
if the system is essentially out of memory anyway, it's probably better
for radeon_crtc_init to return -ENOMEM in this case and for
radeon_modeset_init to propagate that, which will prevent the driver as
a whole from initializing.

-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 8/9] drm/syncobj: add timeline signal ioctl for syncobj v5

2019-03-25 Thread Chunming Zhou

v2: individually allocate chain array, since chain node is free independently.
v3: all existing points must be already signaled before cpu perform signal 
operation,
so add check condition for that.
v4: remove v3 change and add checking to prevent out-of-order
v5: unify binary and timeline

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c|  2 +
 drivers/gpu/drm/drm_syncobj.c  | 73 ++
 include/uapi/drm/drm.h |  1 +
 4 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
drm_syncobj_timeline_signal_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 
DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index ee2d66e047e7..099596190845 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
return ret;
 }
 
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private)
+{
+   struct drm_syncobj_timeline_array *args = data;
+   struct drm_syncobj **syncobjs;
+   struct dma_fence_chain **chains;
+   uint64_t *points;
+   uint32_t i, j;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -EOPNOTSUPP;
+
+   if (args->pad != 0)
+   return -EINVAL;
+
+   if (args->count_handles == 0)
+   return -EINVAL;
+
+   ret = drm_syncobj_array_find(file_private,
+u64_to_user_ptr(args->handles),
+args->count_handles,
+);
+   if (ret < 0)
+   return ret;
+
+   points = kmalloc_array(args->count_handles, sizeof(*points),
+  GFP_KERNEL);
+   if (!points) {
+   ret = -ENOMEM;
+   goto out;
+   }
+   if (!u64_to_user_ptr(args->points)) {
+   memset(points, 0, args->count_handles * sizeof(uint64_t));
+   } else if (copy_from_user(points, u64_to_user_ptr(args->points),
+ sizeof(uint64_t) * args->count_handles)) {
+   ret = -EFAULT;
+   goto err_points;
+   }
+
+   chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL);
+   if (!chains) {
+   ret = -ENOMEM;
+   goto err_points;
+   }
+   for (i = 0; i < args->count_handles; i++) {
+   chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+   if (!chains[i]) {
+   for (j = 0; j < i; j++)
+   kfree(chains[j]);
+   ret = -ENOMEM;
+   goto err_chains;
+   }
+   }
+
+   for (i = 0; i < args->count_handles; i++) {
+   struct dma_fence *fence = dma_fence_get_stub();
+
+   drm_syncobj_add_point(syncobjs[i], chains[i],
+ fence, points[i]);
+   dma_fence_put(fence);
+   }
+err_chains:
+   kfree(chains);
+err_points:
+   kfree(points);
+out:
+   drm_syncobj_array_free(syncobjs, args->count_handles);
+
+   return ret;
+}
+
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
struct drm_file

[PATCH 9/9] drm/amdgpu: update version for timeline syncobj support in amdgpu

2019-03-25 Thread Chunming Zhou

Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8a0732088640..4d8db87048d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -74,9 +74,10 @@
  * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
  * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
  * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
+ * - 3.31.0 - Add syncobj timeline support to AMDGPU_CS.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   30
+#define KMS_DRIVER_MINOR   31
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/9] drm/syncobj: add new drm_syncobj_add_point interface v4

2019-03-25 Thread Chunming Zhou

From: Christian König 

Use the dma_fence_chain object to create a timeline of fence objects
instead of just replacing the existing fence.

v2: rebase and cleanup
v3: fix garbage collection parameters
v4: add unorder point check, print a warn calltrace

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_syncobj.c | 39 +++
 include/drm/drm_syncobj.h |  5 +
 2 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 5329e66598c6..19a9ce638119 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -122,6 +122,45 @@ static void drm_syncobj_remove_wait(struct drm_syncobj 
*syncobj,
spin_unlock(>lock);
 }
 
+/**
+ * drm_syncobj_add_point - add new timeline point to the syncobj
+ * @syncobj: sync object to add timeline point do
+ * @chain: chain node to use to add the point
+ * @fence: fence to encapsulate in the chain node
+ * @point: sequence number to use for the point
+ *
+ * Add the chain node as new timeline point to the syncobj.
+ */
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+  struct dma_fence_chain *chain,
+  struct dma_fence *fence,
+  uint64_t point)
+{
+   struct syncobj_wait_entry *cur, *tmp;
+   struct dma_fence *prev;
+
+   dma_fence_get(fence);
+
+   spin_lock(>lock);
+
+   prev = drm_syncobj_fence_get(syncobj);
+   /* You are adding an unorder point to timeline, which could cause 
payload returned from query_ioctl is 0! */
+   WARN_ON_ONCE(prev && prev->seqno >= point);
+   dma_fence_chain_init(chain, prev, fence, point);
+   rcu_assign_pointer(syncobj->fence, >base);
+
+   list_for_each_entry_safe(cur, tmp, >cb_list, node) {
+   list_del_init(>node);
+   syncobj_wait_syncobj_func(syncobj, cur);
+   }
+   spin_unlock(>lock);
+
+   /* Walk the chain once to trigger garbage collection */
+   dma_fence_chain_for_each(fence, prev);
+   dma_fence_put(prev);
+}
+EXPORT_SYMBOL(drm_syncobj_add_point);
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 0311c9fdbd2f..6cf7243a1dc5 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -27,6 +27,7 @@
 #define __DRM_SYNCOBJ_H__
 
 #include 
+#include 
 
 struct drm_file;
 
@@ -112,6 +113,10 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 u32 handle);
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+  struct dma_fence_chain *chain,
+  struct dma_fence *fence,
+  uint64_t point);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
   struct dma_fence *fence);
 int drm_syncobj_find_fence(struct drm_file *file_private,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/9] drm/syncobj: add transition iotcls between binary and timeline v2

2019-03-25 Thread Chunming Zhou

we need to import/export timeline point.

v2: unify to one transfer ioctl

Signed-off-by: Chunming Zhou 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c|  2 +
 drivers/gpu/drm/drm_syncobj.c  | 74 ++
 include/uapi/drm/drm.h | 10 +
 4 files changed, 88 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 695179bb88dc..dd11ae5f1eef 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -180,6 +180,8 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, 
void *data,
   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
+int drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
 int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7a534c184e52..92b3b7b2fd81 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -686,6 +686,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, 
drm_syncobj_fd_to_handle_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TRANSFER, drm_syncobj_transfer_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, 
drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 087fd4e7eaf3..ee2d66e047e7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -679,6 +679,80 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
>handle);
 }
 
+static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
+   struct drm_syncobj_transfer *args)
+{
+   struct drm_syncobj *timeline_syncobj = NULL;
+   struct dma_fence *fence;
+   struct dma_fence_chain *chain;
+   int ret;
+
+   timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+   if (!timeline_syncobj) {
+   return -ENOENT;
+   }
+   ret = drm_syncobj_find_fence(file_private, args->src_handle,
+args->src_point, args->flags,
+);
+   if (ret)
+   goto err;
+   chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+   if (!chain) {
+   ret = -ENOMEM;
+   goto err1;
+   }
+   drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point);
+err1:
+   dma_fence_put(fence);
+err:
+   drm_syncobj_put(timeline_syncobj);
+
+   return ret;
+}
+
+static int
+drm_syncobj_transfer_to_binary(struct drm_file *file_private,
+  struct drm_syncobj_transfer *args)
+{
+   struct drm_syncobj *binary_syncobj = NULL;
+   struct dma_fence *fence;
+   int ret;
+
+   binary_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+   if (!binary_syncobj)
+   return -ENOENT;
+   ret = drm_syncobj_find_fence(file_private, args->src_handle,
+args->src_point, args->flags, );
+   if (ret)
+   goto err;
+   drm_syncobj_replace_fence(binary_syncobj, fence);
+   dma_fence_put(fence);
+err:
+   drm_syncobj_put(binary_syncobj);
+
+   return ret;
+}
+int
+drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_transfer *args = data;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->pad)
+   return -EINVAL;
+
+   if (args->dst_point)
+   ret = drm_syncobj_transfer_to_timeline(file_private, args);
+   else
+   ret = drm_syncobj_transfer_to_binary(file_private, args);
+
+   return ret;
+}
+
 static void syncobj_wait_fence_func(struct dma_fence *fence,
struct dma_fence_cb *cb)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index c62be0840ba5..e8d0d6b51875 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,15 @@ struct drm_syncobj_handle {
__u32 pad;
 };
 
+struct

[PATCH 6/9] drm/amdgpu: add timeline support in amdgpu CS v3

2019-03-25 Thread Chunming Zhou

syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 152 +
 include/uapi/drm/amdgpu_drm.h  |   8 ++
 3 files changed, 144 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8d0d7f3dd5fb..deec2c796253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -433,6 +433,12 @@ struct amdgpu_cs_chunk {
void*kdata;
 };
 
+struct amdgpu_cs_post_dep {
+   struct drm_syncobj *syncobj;
+   struct dma_fence_chain *chain;
+   u64 point;
+};
+
 struct amdgpu_cs_parser {
struct amdgpu_device*adev;
struct drm_file *filp;
@@ -462,8 +468,8 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
 
-   unsigned num_post_dep_syncobjs;
-   struct drm_syncobj **post_dep_syncobjs;
+   unsignednum_post_deps;
+   struct amdgpu_cs_post_dep   *post_deps;
 };
 
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52a5e4fdc95b..2f6239b6be6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
break;
 
default:
@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
*parser, int error,
ttm_eu_backoff_reservation(>ticket,
   >validated);
 
-   for (i = 0; i < parser->num_post_dep_syncobjs; i++)
-   drm_syncobj_put(parser->post_dep_syncobjs[i]);
-   kfree(parser->post_dep_syncobjs);
+   for (i = 0; i < parser->num_post_deps; i++) {
+   drm_syncobj_put(parser->post_deps[i].syncobj);
+   kfree(parser->post_deps[i].chain);
+   }
+   kfree(parser->post_deps);
 
dma_fence_put(parser->fence);
 
@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
 }
 
 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
-uint32_t handle)
+uint32_t handle, u64 point,
+u64 flags)
 {
-   int r;
struct dma_fence *fence;
-   r = drm_syncobj_find_fence(p->filp, handle, 0, 0, );
-   if (r)
+   int r;
+
+   r = drm_syncobj_find_fence(p->filp, handle, point, flags, );
+   if (r) {
+   DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
return r;
+   }
 
r = amdgpu_sync_fence(p->adev, >job->sync, fence, true);
dma_fence_put(fence);
@@ -1134,46 +1143,118 @@ static int 
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
 {
+   struct drm_amdgpu_cs_chunk_sem *deps;
unsigned num_deps;
int i, r;
-   struct drm_amdgpu_cs_chunk_sem *deps;
 
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+ 0, 0);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
 
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser 
*p,
+struct amdgpu_cs_chunk 
*chunk)
+{
+   struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+   unsigned num_deps;
+   int i, r;
+
+   syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_syncobj);
for (i = 0; i < num_deps; ++i) {
-   r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);

[PATCH 5/9] drm/syncobj: use the timeline point in drm_syncobj_find_fence v4

2019-03-25 Thread Chunming Zhou

From: Christian König 

Implement finding the right timeline point in drm_syncobj_find_fence.

v2: return -EINVAL when the point is not submitted yet.
v3: fix reference counting bug, add flags handling as well
v4: add timeout for find fence

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_syncobj.c | 50 ---
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0e62a793c8dd..087fd4e7eaf3 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -213,6 +213,8 @@ static void drm_syncobj_assign_null_handle(struct 
drm_syncobj *syncobj)
dma_fence_put(fence);
 }
 
+/* 5s default for wait submission */
+#define DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT 50ULL
 /**
  * drm_syncobj_find_fence - lookup and reference the fence in a sync object
  * @file_private: drm file private pointer
@@ -233,16 +235,58 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
   struct dma_fence **fence)
 {
struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-   int ret = 0;
+   struct syncobj_wait_entry wait;
+   u64 timeout = nsecs_to_jiffies64(DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT);
+   int ret;
 
if (!syncobj)
return -ENOENT;
 
*fence = drm_syncobj_fence_get(syncobj);
-   if (!*fence) {
+   drm_syncobj_put(syncobj);
+
+   if (*fence) {
+   ret = dma_fence_chain_find_seqno(fence, point);
+   if (!ret)
+   return 0;
+   dma_fence_put(*fence);
+   } else {
ret = -EINVAL;
}
-   drm_syncobj_put(syncobj);
+
+   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+   return ret;
+
+   memset(, 0, sizeof(wait));
+   wait.task = current;
+   wait.point = point;
+   drm_syncobj_fence_add_wait(syncobj, );
+
+   do {
+   set_current_state(TASK_INTERRUPTIBLE);
+   if (wait.fence) {
+   ret = 0;
+   break;
+   }
+if (timeout == 0) {
+ret = -ETIME;
+break;
+}
+
+   if (signal_pending(current)) {
+   ret = -ERESTARTSYS;
+   break;
+   }
+
+timeout = schedule_timeout(timeout);
+   } while (1);
+
+   __set_current_state(TASK_RUNNING);
+   *fence = wait.fence;
+
+   if (wait.node.next)
+   drm_syncobj_remove_wait(syncobj, );
+
return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/9] dma-buf: add new dma_fence_chain container v7

2019-03-25 Thread Chunming Zhou

From: Christian König 

Lockless container implementation similar to a dma_fence_array, but with
only two elements per node and automatic garbage collection.

v2: properly document dma_fence_chain_for_each, add dma_fence_chain_find_seqno,
drop prev reference during garbage collection if it's not a chain fence.
v3: use head and iterator for dma_fence_chain_for_each
v4: fix reference count in dma_fence_chain_enable_signaling
v5: fix iteration when walking each chain node
v6: add __rcu for member 'prev' of struct chain node
v7: fix rcu warnings from kernel robot

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/dma-buf/Makefile  |   3 +-
 drivers/dma-buf/dma-fence-chain.c | 241 ++
 include/linux/dma-fence-chain.h   |  81 ++
 3 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma-buf/dma-fence-chain.c
 create mode 100644 include/linux/dma-fence-chain.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 0913a6ccab5a..1f006e083eb9 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,4 +1,5 @@
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)+= sync_file.o
 obj-$(CONFIG_SW_SYNC)  += sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)  += udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
new file mode 100644
index ..c729f98a7bd3
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -0,0 +1,241 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ * Christian König 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
+ * @chain: chain node to get the previous node from
+ *
+ * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
+ * chain node.
+ */
+static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain 
*chain)
+{
+   struct dma_fence *prev;
+
+   rcu_read_lock();
+   prev = dma_fence_get_rcu_safe(>prev);
+   rcu_read_unlock();
+   return prev;
+}
+
+/**
+ * dma_fence_chain_walk - chain walking function
+ * @fence: current chain node
+ *
+ * Walk the chain to the next node. Returns the next fence or NULL if we are at
+ * the end of the chain. Garbage collects chain nodes which are already
+ * signaled.
+ */
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
+{
+   struct dma_fence_chain *chain, *prev_chain;
+   struct dma_fence *prev, *replacement, *tmp;
+
+   chain = to_dma_fence_chain(fence);
+   if (!chain) {
+   dma_fence_put(fence);
+   return NULL;
+   }
+
+   while ((prev = dma_fence_chain_get_prev(chain))) {
+
+   prev_chain = to_dma_fence_chain(prev);
+   if (prev_chain) {
+   if (!dma_fence_is_signaled(prev_chain->fence))
+   break;
+
+   replacement = dma_fence_chain_get_prev(prev_chain);
+   } else {
+   if (!dma_fence_is_signaled(prev))
+   break;
+
+   replacement = NULL;
+   }
+
+   tmp = cmpxchg((void **)>prev, (void *)prev, (void 
*)replacement);
+   if (tmp == prev)
+   dma_fence_put(tmp);
+   else
+   dma_fence_put(replacement);
+   dma_fence_put(prev);
+   }
+
+   dma_fence_put(fence);
+   return prev;
+}
+EXPORT_SYMBOL(dma_fence_chain_walk);
+
+/**
+ * dma_fence_chain_find_seqno - find fence chain node by seqno
+ * @pfence: pointer to the chain node where to start
+ * @seqno: the sequence number to search for
+ *
+ * Advance the fence pointer to the chain node which will signal this sequence
+ * number. If no sequence number is provided then this is a no-op.
+ *
+ * Returns EINVAL if the fence is not a chain node or the sequence number has
+ * not yet advanced far enough.
+ */
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
+{
+   struct dma_fence_chain *chain;
+
+   if (!seqno)
+   return 0;
+
+   chain =

[PATCH 4/9] drm/syncobj: add timeline payload query ioctl v6

2019-03-25 Thread Chunming Zhou

user mode can query timeline payload.
v2: check return value of copy_to_user
v3: handle querying entry by entry
v4: rebase on new chain container, simplify interface
v5: query last signaled timeline point, not last point.
v6: add unorder point check

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c|  2 ++
 drivers/gpu/drm/drm_syncobj.c  | 62 ++
 include/uapi/drm/drm.h | 10 ++
 4 files changed, 76 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 331ac6225b58..695179bb88dc 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_private);
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index c984654646fa..7a534c184e52 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -694,6 +694,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 
DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, 
DRM_MASTER|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index eae51978cda4..0e62a793c8dd 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1064,3 +1064,65 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
 
return ret;
 }
+
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private)
+{
+   struct drm_syncobj_timeline_array *args = data;
+   struct drm_syncobj **syncobjs;
+   uint64_t __user *points = u64_to_user_ptr(args->points);
+   uint32_t i;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->pad != 0)
+   return -EINVAL;
+
+   if (args->count_handles == 0)
+   return -EINVAL;
+
+   ret = drm_syncobj_array_find(file_private,
+u64_to_user_ptr(args->handles),
+args->count_handles,
+);
+   if (ret < 0)
+   return ret;
+
+   for (i = 0; i < args->count_handles; i++) {
+   struct dma_fence_chain *chain;
+   struct dma_fence *fence;
+   uint64_t point;
+
+   fence = drm_syncobj_fence_get(syncobjs[i]);
+   chain = to_dma_fence_chain(fence);
+   if (chain) {
+   struct dma_fence *iter, *last_signaled = NULL;
+
+   dma_fence_chain_for_each(iter, fence) {
+   if (!iter)
+   break;
+   dma_fence_put(last_signaled);
+   last_signaled = dma_fence_get(iter);
+   if 
(!to_dma_fence_chain(last_signaled)->prev_seqno)
+   /* It is most likely that timeline has
+* unorder points. */
+   break;
+   }
+   point = dma_fence_is_signaled(last_signaled) ?
+   last_signaled->seqno :
+   to_dma_fence_chain(last_signaled)->prev_seqno;
+   dma_fence_put(last_signaled);
+   } else {
+   point = 0;
+   }
+   ret = copy_to_user([i], , sizeof(uint64_t));
+   ret = ret ? -EFAULT : 0;
+   if (ret)
+   break;
+   }
+   drm_syncobj_array_free(syncobjs, args->count_handles);
+
+   return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 44ebcdd9bd1d..c62be0840ba5 100644
--- a/include/uapi/drm/drm.h
+++

[PATCH 3/9] drm/syncobj: add support for timeline point wait v8

2019-03-25 Thread Chunming Zhou

points array is one-to-one match with syncobjs array.
v2:
add seperate ioctl for timeline point wait, otherwise break uapi.
v3:
userspace can specify two kinds waits::
a. Wait for time point to be completed.
b. and wait for time point to become available
v4:
rebase
v5:
add comment for xxx_WAIT_AVAILABLE
v6: rebase and rework on new container
v7: drop _WAIT_COMPLETED, it is the default anyway
v8: correctly handle garbage collected fences

Signed-off-by: Chunming Zhou 
Signed-off-by: Christian König 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c|   2 +
 drivers/gpu/drm/drm_syncobj.c  | 153 ++---
 include/uapi/drm/drm.h |  15 
 4 files changed, 143 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 251d67e04c2d..331ac6225b58 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
   struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
+int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 687943df58e1..c984654646fa 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, 
drm_syncobj_timeline_wait_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 19a9ce638119..eae51978cda4 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -61,6 +61,7 @@ struct syncobj_wait_entry {
struct task_struct *task;
struct dma_fence *fence;
struct dma_fence_cb fence_cb;
+   u64point;
 };
 
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
@@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find);
 static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
   struct syncobj_wait_entry *wait)
 {
+   struct dma_fence *fence;
+
if (wait->fence)
return;
 
@@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj 
*syncobj,
 * have the lock, try one more time just to be sure we don't add a
 * callback when a fence has already been set.
 */
-   if (syncobj->fence)
-   wait->fence = dma_fence_get(
-   rcu_dereference_protected(syncobj->fence, 1));
-   else
+   fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+   if (!fence || dma_fence_chain_find_seqno(, wait->point)) {
+   dma_fence_put(fence);
list_add_tail(>node, >cb_list);
+   } else if (!fence) {
+   wait->fence = dma_fence_get_stub();
+   } else {
+   wait->fence = fence;
+   }
spin_unlock(>lock);
 }
 
@@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
dma_fence_chain_init(chain, prev, fence, point);
rcu_assign_pointer(syncobj->fence, >base);
 
-   list_for_each_entry_safe(cur, tmp, >cb_list, node) {
-   list_del_init(>node);
+   list_for_each_entry_safe(cur, tmp, >cb_list, node)
syncobj_wait_syncobj_func(syncobj, cur);
-   }
spin_unlock(>lock);
 
/* Walk the chain once to trigger garbage collection */
@@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
rcu_assign_pointer(syncobj->fence, fence);
 
if (fence != old_fence) {
-   list_for_each_entry_safe(cur, tmp, >cb_list, node) {
-   list_del_init(>node);
+   list_for_each_entry_safe(cur, tmp, >cb_list, node)
syncobj_wait_syncobj_func(syncobj, cur);
-   }
}
 
spin_unlock(>lock);
@@ -644,13 +647,27 @@ static void syncobj_wait_fence_func(struct dma_fence

[bug report] drm/amd/display: Link train only when link is DP and backend is enabled

2019-03-25 Thread Dan Carpenter

Hello Samson Tam,

This is a semi-automatic email about new static checker warnings.

The patch 66acd4418d7d: "drm/amd/display: Link train only when link
is DP and backend is enabled" from Mar 4, 2019, leads to the
following Smatch complaint:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:553 
dc_link_set_preferred_link_settings()
warn: variable dereferenced before check 'link_stream' (see line 550)

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c
   540  }
   541  }
   542  
   543  /* Stream not found */
   544  if (i == MAX_PIPES)
   545  return;
   546  
   547  link_stream = 
link->dc->current_state->res_ctx.pipe_ctx[i].stream;
   548  
   549  /* Cannot retrain link if backend is off */
   550  if (link_stream->dpms_off)
^
New check

   551  return;
   552  
   553  if (link_stream)
^^^
The old code used to check for NULL
   554  decide_link_settings(link_stream, _settings);
   555  

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH v3] drm/amdgpu: add more inject control

2019-03-25 Thread Pan, Xinhui


TA accept some options like address is in sram or vram. Most default
options are enough to use. But allow user to setup them.

reuse inject.value to place these options.

At the same time, we need translate the address to a physical/gpu
address which the pages are mapped at.

V2: add a helper to makeup TA data
V3: change magic number to 0xa5.
 
Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 108 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  20 -
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 469cb6477b8e..2b7a420d5a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -121,6 +121,21 @@ const char *ras_block_string[] = {
 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 
+static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
+   uint64_t offset, uint64_t size,
+   struct amdgpu_bo **bo_ptr);
+
+static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
+   struct amdgpu_bo **bo_ptr);
+
+static int amdgpu_ras_translate_addr(struct amdgpu_device *adev,
+   u64 *gpu_addr, int addr_type)
+{
+   /* TODO */
+
+   return -EINVAL;
+}
+
 static void amdgpu_ras_self_test(struct amdgpu_device *adev)
 {
/* TODO */
@@ -176,6 +191,53 @@ static int amdgpu_ras_find_block_id_by_name(const char 
*name, int *block_id)
return -EINVAL;
 }
 
+static bool amdgpu_ras_extract_inject_data(const struct ras_inject_if *inject,
+   u64 *address, u64 *value, int *addr_type, int *inject_type)
+{
+   const struct inject_data *data = >data;
+
+   if (data->magic != AMDGPU_RAS_INJECT_MAGIC ||
+   data->u8_reserved ||
+   data->u32_reserved) {
+   *address = inject->address;
+   *value = inject->value;
+
+   return false;
+   }
+
+   *address = inject->address;
+   *value = data->value;
+   *addr_type = data->addr_type;
+   *inject_type = data->inject_type;
+
+   return true;
+}
+
+/* The definiton of ras_inject_if's member is different in different blocks. */
+static void amdgpu_ras_makeup_ta_inject_data(struct ras_inject_if *inject,
+   u64 address, u64 value, int addr_type, int inject_type)
+{
+   switch (inject->head.block) {
+   case AMDGPU_RAS_BLOCK__UMC:
+   /* 0 is vram, 1 is sram. */
+   inject->head.sub_block_index = addr_type;
+   /* not documented yet. */
+   inject->value = inject_type;
+   inject->address = address;
+   break;
+   case AMDGPU_RAS_BLOCK__SDMA ... AMDGPU_RAS_BLOCK__FUSE:
+   inject->address = address;
+   inject->value = value;
+   break;
+   default:
+   WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n",
+   inject->head.block);
+   inject->address = address;
+   inject->value = value;
+   break;
+   };
+}
+
 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
const char __user *buf, size_t size,
loff_t *pos, struct ras_debug_if *data)
@@ -300,7 +362,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file 
*f, const char __user *
 {
struct amdgpu_device *adev = (struct amdgpu_device 
*)file_inode(f)->i_private;
struct ras_debug_if data;
+   struct amdgpu_bo *bo = NULL;
int ret = 0;
+   u64 address;
+   u64 value;
+   int addr_type = 0;
+   int inject_type;
+
+   BUILD_BUG_ON(sizeof(struct inject_data) > sizeof(uint64_t));
 
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, );
if (ret)
@@ -317,7 +386,44 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file 
*f, const char __user *
ret = amdgpu_ras_feature_enable(adev, , 1);
break;
case 2:
-   ret = amdgpu_ras_error_inject(adev, );
+   do {
+   /* In general, user can make up this inject data
+* by themself. But each IP might have their own
+* definitions, so handle such trivial but
+* important things in driver. We reuse inject.value
+* to place these control fields. To not break old
+* user applications, keep its size unchanged.
+*/
+   if (amdgpu_ras_extract_inject_data(,
+   , ,
+   _type, _type))
+   amdgpu_ras_makeup_ta_inject_data(,
+   address, value,
+

[PATCH libdrm] amdgpu: alloc vram for inject

2019-03-25 Thread Pan, Xinhui


inject need a real valid address.

Signed-off-by: xinhui pan 
---
 tests/amdgpu/ras_tests.c | 53 +++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/tests/amdgpu/ras_tests.c b/tests/amdgpu/ras_tests.c
index 612ad1d7..d3f1750d 100644
--- a/tests/amdgpu/ras_tests.c
+++ b/tests/amdgpu/ras_tests.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "xf86drm.h"
 
 const char *ras_block_string[] = {
@@ -452,6 +453,47 @@ static int amdgpu_ras_query_err_count(enum 
amdgpu_ras_block block,
return 0;
 }
 
+typedef struct ras_inject_internal {
+   amdgpu_bo_handle bo_handle;
+   amdgpu_va_handle va_handle;
+   void *cpu;
+   uint64_t mc;
+} amdgpu_ras_handle;
+
+static int amdgpu_ras_alloc_vram_page(uint64_t *mc_address, void **handle)
+{
+   amdgpu_ras_handle h;
+   int r;
+
+   *handle = malloc(sizeof(amdgpu_ras_handle));
+   if (!*handle)
+   return -1;
+
+   r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
+   AMDGPU_GEM_DOMAIN_VRAM, 0,
+   _handle, ,
+   , _handle);
+   if (r) {
+   free(*handle);
+   return r;
+   }
+
+   memset(h.cpu, 4096, 0);
+   memcpy(*handle, , sizeof(h));
+   *mc_address = h.mc;
+
+   return 0;
+}
+
+static void amdgpu_ras_free_vram_page(void *handle)
+{
+   amdgpu_ras_handle h = *(amdgpu_ras_handle *)handle;
+
+   free(handle);
+
+   amdgpu_bo_unmap_and_free(h.bo_handle, h.va_handle, h.mc, 4096);
+}
+
 //tests
 static void amdgpu_ras_features_test(int enable)
 {
@@ -508,6 +550,14 @@ static void __amdgpu_ras_inject_test(void)
int ret;
int i;
unsigned long ue, ce, ue_old, ce_old;
+   uint64_t mc;
+   void *handle;
+
+   ret = amdgpu_ras_alloc_vram_page(, );
+   CU_ASSERT_EQUAL(ret, 0);
+
+   if (ret)
+   return;
 
data.op = 2;
for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
@@ -519,7 +569,7 @@ static void __amdgpu_ras_inject_test(void)
.sub_block_index = 0,
.name = "",
},
-   .address = 0,
+   .address = mc,
.value = 0,
};
 
@@ -563,6 +613,7 @@ loop:
CU_ASSERT_EQUAL(ue_old + 1, ue);
CU_ASSERT_EQUAL(ce_old, ce);
}
+   amdgpu_ras_free_vram_page(handle);
 }
 
 static void amdgpu_ras_inject_test(void)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[bug report] drm/amd/display: On DCN1, Wait for vupdate on cursor updates

2019-03-25 Thread Dan Carpenter

Hello David Francis,

The patch c6ade4ee7375: "drm/amd/display: On DCN1, Wait for vupdate
on cursor updates" from Feb 21, 2019, leads to the following static
checker warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c:216 
delay_cursor_until_vupdate()
error: uninitialized symbol 'vpos'.

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c
202 static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, 
struct dc *dc)
203 {
204 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
205 unsigned int vupdate_line;
206 unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos;
207 struct dc_stream_state *stream = pipe_ctx->stream;
208 unsigned int us_per_line;
209 
210 if (stream->ctx->asic_id.chip_family == FAMILY_RV &&
211 
ASIC_REV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) {
212 
213 vupdate_line = get_vupdate_offset_from_vsync(pipe_ctx);
214 dc_stream_get_crtc_position(dc, , 1, , 
);

Can return false on failure.

215 
--> 216 if (vpos >= vupdate_line)
217 return;
218 
219 us_per_line = stream->timing.h_total * 1 / 
stream->timing.pix_clk_100hz;
220 lines_to_vupdate = vupdate_line - vpos;
221 us_to_vupdate = lines_to_vupdate * us_per_line;
222 
223 /* 70 us is a conservative estimate of cursor update 
time*/
224 if (us_to_vupdate < 70)
225 udelay(us_to_vupdate);
226 }
227 #endif
228 }

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

38 matches

Mail list logo